diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md
index 7cd2bb60eb..310c55090a 100644
--- a/CONTRIBUTING_CN.md
+++ b/CONTRIBUTING_CN.md
@@ -36,7 +36,7 @@
   | 被团队成员标记为高优先级的功能    | 高优先级   |
   | 在 [community feedback board](https://github.com/langgenius/dify/discussions/categories/feedbacks) 内反馈的常见功能请求 | 中等优先级 |
   | 非核心功能和小幅改进                     | 低优先级    |
-  | 有价值当不紧急                                   | 未来功能  |
+  | 有价值但不紧急                                   | 未来功能  |
 
 ### 其他任何事情（例如 bug 报告、性能优化、拼写错误更正）：
 * 立即开始编码。
@@ -138,7 +138,7 @@ Dify 的后端使用 Python 编写，使用 [Flask](https://flask.palletsproject
 ├── models                // 描述数据模型和 API 响应的形状
 ├── public                // 如 favicon 等元资源
 ├── service               // 定义 API 操作的形状
-├── test                  
+├── test
 ├── types                 // 函数参数和返回值的描述
 └── utils                 // 共享的实用函数
 ```
diff --git a/api/README.md b/api/README.md
index 70ca2e86a8..bab33f9293 100644
--- a/api/README.md
+++ b/api/README.md
@@ -65,14 +65,12 @@
 
 8. Start Dify [web](../web) service.
 9. Setup your application by visiting `http://localhost:3000`...
-10. If you need to debug local async processing, please start the worker service.
+10. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
 
    ```bash
    poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion
    ```
 
-   The started celery app handles the async tasks, e.g. dataset importing and documents indexing.
-
 ## Testing
 
 1. Install dependencies for both the backend and the test environment
diff --git a/api/commands.py b/api/commands.py
index 3a6b4963cf..b8fc81af67 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -28,28 +28,28 @@ from services.account_service import RegisterService, TenantService
 
 
 @click.command("reset-password", help="Reset the account password.")
-@click.option("--email", prompt=True, help="The email address of the account whose password you need to reset")
-@click.option("--new-password", prompt=True, help="the new password.")
-@click.option("--password-confirm", prompt=True, help="the new password confirm.")
+@click.option("--email", prompt=True, help="Account email to reset password for")
+@click.option("--new-password", prompt=True, help="New password")
+@click.option("--password-confirm", prompt=True, help="Confirm new password")
 def reset_password(email, new_password, password_confirm):
     """
     Reset password of owner account
     Only available in SELF_HOSTED mode
     """
     if str(new_password).strip() != str(password_confirm).strip():
-        click.echo(click.style("sorry. The two passwords do not match.", fg="red"))
+        click.echo(click.style("Passwords do not match.", fg="red"))
         return
 
     account = db.session.query(Account).filter(Account.email == email).one_or_none()
 
     if not account:
-        click.echo(click.style("sorry. the account: [{}] not exist .".format(email), fg="red"))
+        click.echo(click.style("Account not found for email: {}".format(email), fg="red"))
         return
 
     try:
         valid_password(new_password)
     except:
-        click.echo(click.style("sorry. The passwords must match {} ".format(password_pattern), fg="red"))
+        click.echo(click.style("Invalid password. Must match {}".format(password_pattern), fg="red"))
         return
 
     # generate password salt
@@ -62,37 +62,37 @@ def reset_password(email, new_password, password_confirm):
     account.password = base64_password_hashed
     account.password_salt = base64_salt
     db.session.commit()
-    click.echo(click.style("Congratulations! Password has been reset.", fg="green"))
+    click.echo(click.style("Password reset successfully.", fg="green"))
 
 
 @click.command("reset-email", help="Reset the account email.")
-@click.option("--email", prompt=True, help="The old email address of the account whose email you need to reset")
-@click.option("--new-email", prompt=True, help="the new email.")
-@click.option("--email-confirm", prompt=True, help="the new email confirm.")
+@click.option("--email", prompt=True, help="Current account email")
+@click.option("--new-email", prompt=True, help="New email")
+@click.option("--email-confirm", prompt=True, help="Confirm new email")
 def reset_email(email, new_email, email_confirm):
     """
     Replace account email
     :return:
     """
     if str(new_email).strip() != str(email_confirm).strip():
-        click.echo(click.style("Sorry, new email and confirm email do not match.", fg="red"))
+        click.echo(click.style("New emails do not match.", fg="red"))
         return
 
     account = db.session.query(Account).filter(Account.email == email).one_or_none()
 
     if not account:
-        click.echo(click.style("sorry. the account: [{}] not exist .".format(email), fg="red"))
+        click.echo(click.style("Account not found for email: {}".format(email), fg="red"))
         return
 
     try:
         email_validate(new_email)
     except:
-        click.echo(click.style("sorry. {} is not a valid email. ".format(email), fg="red"))
+        click.echo(click.style("Invalid email: {}".format(new_email), fg="red"))
         return
 
     account.email = new_email
     db.session.commit()
-    click.echo(click.style("Congratulations!, email has been reset.", fg="green"))
+    click.echo(click.style("Email updated successfully.", fg="green"))
 
 
 @click.command(
@@ -104,7 +104,7 @@ def reset_email(email, new_email, email_confirm):
 )
 @click.confirmation_option(
     prompt=click.style(
-        "Are you sure you want to reset encrypt key pair? this operation cannot be rolled back!", fg="red"
+        "Are you sure you want to reset encrypt key pair? This operation cannot be rolled back!", fg="red"
     )
 )
 def reset_encrypt_key_pair():
@@ -114,13 +114,13 @@ def reset_encrypt_key_pair():
     Only support SELF_HOSTED mode.
     """
     if dify_config.EDITION != "SELF_HOSTED":
-        click.echo(click.style("Sorry, only support SELF_HOSTED mode.", fg="red"))
+        click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red"))
         return
 
     tenants = db.session.query(Tenant).all()
     for tenant in tenants:
         if not tenant:
-            click.echo(click.style("Sorry, no workspace found. Please enter /install to initialize.", fg="red"))
+            click.echo(click.style("No workspaces found. Run /install first.", fg="red"))
             return
 
         tenant.encrypt_public_key = generate_key_pair(tenant.id)
@@ -137,7 +137,7 @@ def reset_encrypt_key_pair():
         )
 
 
-@click.command("vdb-migrate", help="migrate vector db.")
+@click.command("vdb-migrate", help="Migrate vector db.")
 @click.option("--scope", default="all", prompt=False, help="The scope of vector database to migrate, Default is All.")
 def vdb_migrate(scope: str):
     if scope in {"knowledge", "all"}:
@@ -150,7 +150,7 @@ def migrate_annotation_vector_database():
     """
     Migrate annotation datas to target vector database .
     """
-    click.echo(click.style("Start migrate annotation data.", fg="green"))
+    click.echo(click.style("Starting annotation data migration.", fg="green"))
     create_count = 0
     skipped_count = 0
     total_count = 0
@@ -174,14 +174,14 @@ def migrate_annotation_vector_database():
                 f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped."
             )
             try:
-                click.echo("Create app annotation index: {}".format(app.id))
+                click.echo("Creating app annotation index: {}".format(app.id))
                 app_annotation_setting = (
                     db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app.id).first()
                 )
 
                 if not app_annotation_setting:
                     skipped_count = skipped_count + 1
-                    click.echo("App annotation setting is disabled: {}".format(app.id))
+                    click.echo("App annotation setting disabled: {}".format(app.id))
                     continue
                 # get dataset_collection_binding info
                 dataset_collection_binding = (
@@ -190,7 +190,7 @@ def migrate_annotation_vector_database():
                     .first()
                 )
                 if not dataset_collection_binding:
-                    click.echo("App annotation collection binding is not exist: {}".format(app.id))
+                    click.echo("App annotation collection binding not found: {}".format(app.id))
                     continue
                 annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app.id).all()
                 dataset = Dataset(
@@ -211,11 +211,11 @@ def migrate_annotation_vector_database():
                         documents.append(document)
 
                 vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
-                click.echo(f"Start to migrate annotation, app_id: {app.id}.")
+                click.echo(f"Migrating annotations for app: {app.id}.")
 
                 try:
                     vector.delete()
-                    click.echo(click.style(f"Successfully delete vector index for app: {app.id}.", fg="green"))
+                    click.echo(click.style(f"Deleted vector index for app {app.id}.", fg="green"))
                 except Exception as e:
                     click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red"))
                     raise e
@@ -223,12 +223,12 @@ def migrate_annotation_vector_database():
                     try:
                         click.echo(
                             click.style(
-                                f"Start to created vector index with {len(documents)} annotations for app {app.id}.",
+                                f"Creating vector index with {len(documents)} annotations for app {app.id}.",
                                 fg="green",
                             )
                         )
                         vector.create(documents)
-                        click.echo(click.style(f"Successfully created vector index for app {app.id}.", fg="green"))
+                        click.echo(click.style(f"Created vector index for app {app.id}.", fg="green"))
                     except Exception as e:
                         click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red"))
                         raise e
@@ -237,14 +237,14 @@ def migrate_annotation_vector_database():
             except Exception as e:
                 click.echo(
                     click.style(
-                        "Create app annotation index error: {} {}".format(e.__class__.__name__, str(e)), fg="red"
+                        "Error creating app annotation index: {} {}".format(e.__class__.__name__, str(e)), fg="red"
                     )
                 )
                 continue
 
     click.echo(
         click.style(
-            f"Congratulations! Create {create_count} app annotation indexes, and skipped {skipped_count} apps.",
+            f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps.",
             fg="green",
         )
     )
@@ -254,7 +254,7 @@ def migrate_knowledge_vector_database():
     """
     Migrate vector database datas to target vector database .
     """
-    click.echo(click.style("Start migrate vector db.", fg="green"))
+    click.echo(click.style("Starting vector database migration.", fg="green"))
     create_count = 0
     skipped_count = 0
     total_count = 0
@@ -278,7 +278,7 @@ def migrate_knowledge_vector_database():
                 f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped."
             )
             try:
-                click.echo("Create dataset vdb index: {}".format(dataset.id))
+                click.echo("Creating dataset vector database index: {}".format(dataset.id))
                 if dataset.index_struct_dict:
                     if dataset.index_struct_dict["type"] == vector_type:
                         skipped_count = skipped_count + 1
@@ -299,7 +299,7 @@ def migrate_knowledge_vector_database():
                         if dataset_collection_binding:
                             collection_name = dataset_collection_binding.collection_name
                         else:
-                            raise ValueError("Dataset Collection Bindings is not exist!")
+                            raise ValueError("Dataset Collection Binding not found")
                     else:
                         dataset_id = dataset.id
                         collection_name = Dataset.gen_collection_name_by_id(dataset_id)
@@ -351,14 +351,12 @@ def migrate_knowledge_vector_database():
                     raise ValueError(f"Vector store {vector_type} is not supported.")
 
                 vector = Vector(dataset)
-                click.echo(f"Start to migrate dataset {dataset.id}.")
+                click.echo(f"Migrating dataset {dataset.id}.")
 
                 try:
                     vector.delete()
                     click.echo(
-                        click.style(
-                            f"Successfully delete vector index {collection_name} for dataset {dataset.id}.", fg="green"
-                        )
+                        click.style(f"Deleted vector index {collection_name} for dataset {dataset.id}.", fg="green")
                     )
                 except Exception as e:
                     click.echo(
@@ -410,15 +408,13 @@ def migrate_knowledge_vector_database():
                     try:
                         click.echo(
                             click.style(
-                                f"Start to created vector index with {len(documents)} documents of {segments_count}"
+                                f"Creating vector index with {len(documents)} documents of {segments_count}"
                                 f" segments for dataset {dataset.id}.",
                                 fg="green",
                             )
                         )
                         vector.create(documents)
-                        click.echo(
-                            click.style(f"Successfully created vector index for dataset {dataset.id}.", fg="green")
-                        )
+                        click.echo(click.style(f"Created vector index for dataset {dataset.id}.", fg="green"))
                     except Exception as e:
                         click.echo(click.style(f"Failed to created vector index for dataset {dataset.id}.", fg="red"))
                         raise e
@@ -429,13 +425,13 @@ def migrate_knowledge_vector_database():
             except Exception as e:
                 db.session.rollback()
                 click.echo(
-                    click.style("Create dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
+                    click.style("Error creating dataset index: {} {}".format(e.__class__.__name__, str(e)), fg="red")
                 )
                 continue
 
     click.echo(
         click.style(
-            f"Congratulations! Create {create_count} dataset indexes, and skipped {skipped_count} datasets.", fg="green"
+            f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets.", fg="green"
         )
     )
 
@@ -445,7 +441,7 @@ def convert_to_agent_apps():
     """
     Convert Agent Assistant to Agent App.
     """
-    click.echo(click.style("Start convert to agent apps.", fg="green"))
+    click.echo(click.style("Starting convert to agent apps.", fg="green"))
 
     proceeded_app_ids = []
 
@@ -496,23 +492,23 @@ def convert_to_agent_apps():
             except Exception as e:
                 click.echo(click.style("Convert app error: {} {}".format(e.__class__.__name__, str(e)), fg="red"))
 
-    click.echo(click.style("Congratulations! Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green"))
+    click.echo(click.style("Conversion complete. Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green"))
 
 
-@click.command("add-qdrant-doc-id-index", help="add qdrant doc_id index.")
-@click.option("--field", default="metadata.doc_id", prompt=False, help="index field , default is metadata.doc_id.")
+@click.command("add-qdrant-doc-id-index", help="Add Qdrant doc_id index.")
+@click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.")
 def add_qdrant_doc_id_index(field: str):
-    click.echo(click.style("Start add qdrant doc_id index.", fg="green"))
+    click.echo(click.style("Starting Qdrant doc_id index creation.", fg="green"))
     vector_type = dify_config.VECTOR_STORE
     if vector_type != "qdrant":
-        click.echo(click.style("Sorry, only support qdrant vector store.", fg="red"))
+        click.echo(click.style("This command only supports Qdrant vector store.", fg="red"))
         return
     create_count = 0
 
     try:
         bindings = db.session.query(DatasetCollectionBinding).all()
         if not bindings:
-            click.echo(click.style("Sorry, no dataset collection bindings found.", fg="red"))
+            click.echo(click.style("No dataset collection bindings found.", fg="red"))
             return
         import qdrant_client
         from qdrant_client.http.exceptions import UnexpectedResponse
@@ -522,7 +518,7 @@ def add_qdrant_doc_id_index(field: str):
 
         for binding in bindings:
             if dify_config.QDRANT_URL is None:
-                raise ValueError("Qdrant url is required.")
+                raise ValueError("Qdrant URL is required.")
             qdrant_config = QdrantConfig(
                 endpoint=dify_config.QDRANT_URL,
                 api_key=dify_config.QDRANT_API_KEY,
@@ -539,41 +535,39 @@ def add_qdrant_doc_id_index(field: str):
             except UnexpectedResponse as e:
                 # Collection does not exist, so return
                 if e.status_code == 404:
-                    click.echo(
-                        click.style(f"Collection not found, collection_name:{binding.collection_name}.", fg="red")
-                    )
+                    click.echo(click.style(f"Collection not found: {binding.collection_name}.", fg="red"))
                     continue
                 # Some other error occurred, so re-raise the exception
                 else:
                     click.echo(
                         click.style(
-                            f"Failed to create qdrant index, collection_name:{binding.collection_name}.", fg="red"
+                            f"Failed to create Qdrant index for collection: {binding.collection_name}.", fg="red"
                         )
                     )
 
     except Exception as e:
-        click.echo(click.style("Failed to create qdrant client.", fg="red"))
+        click.echo(click.style("Failed to create Qdrant client.", fg="red"))
 
-    click.echo(click.style(f"Congratulations! Create {create_count} collection indexes.", fg="green"))
+    click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green"))
 
 
 @click.command("create-tenant", help="Create account and tenant.")
-@click.option("--email", prompt=True, help="The email address of the tenant account.")
-@click.option("--name", prompt=True, help="The workspace name of the tenant account.")
+@click.option("--email", prompt=True, help="Tenant account email.")
+@click.option("--name", prompt=True, help="Workspace name.")
 @click.option("--language", prompt=True, help="Account language, default: en-US.")
 def create_tenant(email: str, language: Optional[str] = None, name: Optional[str] = None):
     """
     Create tenant account
     """
     if not email:
-        click.echo(click.style("Sorry, email is required.", fg="red"))
+        click.echo(click.style("Email is required.", fg="red"))
         return
 
     # Create account
     email = email.strip()
 
     if "@" not in email:
-        click.echo(click.style("Sorry, invalid email address.", fg="red"))
+        click.echo(click.style("Invalid email address.", fg="red"))
         return
 
     account_name = email.split("@")[0]
@@ -593,19 +587,19 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str
 
     click.echo(
         click.style(
-            "Congratulations! Account and tenant created.\nAccount: {}\nPassword: {}".format(email, new_password),
+            "Account and tenant created.\nAccount: {}\nPassword: {}".format(email, new_password),
             fg="green",
         )
     )
 
 
-@click.command("upgrade-db", help="upgrade the database")
+@click.command("upgrade-db", help="Upgrade the database")
 def upgrade_db():
     click.echo("Preparing database migration...")
     lock = redis_client.lock(name="db_upgrade_lock", timeout=60)
     if lock.acquire(blocking=False):
         try:
-            click.echo(click.style("Start database migration.", fg="green"))
+            click.echo(click.style("Starting database migration.", fg="green"))
 
             # run db migration
             import flask_migrate
@@ -615,7 +609,7 @@ def upgrade_db():
             click.echo(click.style("Database migration successful!", fg="green"))
 
         except Exception as e:
-            logging.exception(f"Database migration failed, error: {e}")
+            logging.exception(f"Database migration failed: {e}")
         finally:
             lock.release()
     else:
@@ -627,7 +621,7 @@ def fix_app_site_missing():
     """
     Fix app related site missing issue.
     """
-    click.echo(click.style("Start fix app related site missing issue.", fg="green"))
+    click.echo(click.style("Starting fix for missing app-related sites.", fg="green"))
 
     failed_app_ids = []
     while True:
@@ -650,22 +644,22 @@ where sites.id is null limit 1000"""
                     if tenant:
                         accounts = tenant.get_accounts()
                         if not accounts:
-                            print("Fix app {} failed.".format(app.id))
+                            print("Fix failed for app {}".format(app.id))
                             continue
 
                         account = accounts[0]
-                        print("Fix app {} related site missing issue.".format(app.id))
+                        print("Fixing missing site for app {}".format(app.id))
                         app_was_created.send(app, account=account)
                 except Exception as e:
                     failed_app_ids.append(app_id)
-                    click.echo(click.style("Fix app {} related site missing issue failed!".format(app_id), fg="red"))
+                    click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
                     logging.exception(f"Fix app related site missing issue failed, error: {e}")
                     continue
 
             if not processed_count:
                 break
 
-    click.echo(click.style("Congratulations! Fix app related site missing issue successful!", fg="green"))
+    click.echo(click.style("Fix for missing app-related sites completed successfully!", fg="green"))
 
 
 def register_commands(app):
diff --git a/api/configs/deploy/__init__.py b/api/configs/deploy/__init__.py
index 10271483c4..66d6a55b4c 100644
--- a/api/configs/deploy/__init__.py
+++ b/api/configs/deploy/__init__.py
@@ -4,30 +4,30 @@ from pydantic_settings import BaseSettings
 
 class DeploymentConfig(BaseSettings):
     """
-    Deployment configs
+    Configuration settings for application deployment
     """
 
     APPLICATION_NAME: str = Field(
-        description="application name",
+        description="Name of the application, used for identification and logging purposes",
         default="langgenius/dify",
     )
 
     DEBUG: bool = Field(
-        description="whether to enable debug mode.",
+        description="Enable debug mode for additional logging and development features",
         default=False,
     )
 
     TESTING: bool = Field(
-        description="",
+        description="Enable testing mode for running automated tests",
         default=False,
     )
 
     EDITION: str = Field(
-        description="deployment edition",
+        description="Deployment edition of the application (e.g., 'SELF_HOSTED', 'CLOUD')",
         default="SELF_HOSTED",
     )
 
     DEPLOY_ENV: str = Field(
-        description="deployment environment, default to PRODUCTION.",
+        description="Deployment environment (e.g., 'PRODUCTION', 'DEVELOPMENT'), default to PRODUCTION",
         default="PRODUCTION",
     )
diff --git a/api/configs/enterprise/__init__.py b/api/configs/enterprise/__init__.py
index c661593a44..eda6345e14 100644
--- a/api/configs/enterprise/__init__.py
+++ b/api/configs/enterprise/__init__.py
@@ -4,17 +4,17 @@ from pydantic_settings import BaseSettings
 
 class EnterpriseFeatureConfig(BaseSettings):
     """
-    Enterprise feature configs.
+    Configuration for enterprise-level features.
     **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
     """
 
     ENTERPRISE_ENABLED: bool = Field(
-        description="whether to enable enterprise features."
+        description="Enable or disable enterprise-level features."
         "Before using, please contact business@dify.ai by email to inquire about licensing matters.",
         default=False,
     )
 
     CAN_REPLACE_LOGO: bool = Field(
-        description="whether to allow replacing enterprise logo.",
+        description="Allow customization of the enterprise logo.",
         default=False,
     )
diff --git a/api/configs/extra/notion_config.py b/api/configs/extra/notion_config.py
index bd1268fa45..f9c4d73463 100644
--- a/api/configs/extra/notion_config.py
+++ b/api/configs/extra/notion_config.py
@@ -6,30 +6,31 @@ from pydantic_settings import BaseSettings
 
 class NotionConfig(BaseSettings):
     """
-    Notion integration configs
+    Configuration settings for Notion integration
     """
 
     NOTION_CLIENT_ID: Optional[str] = Field(
-        description="Notion client ID",
+        description="Client ID for Notion API authentication. Required for OAuth 2.0 flow.",
         default=None,
     )
 
     NOTION_CLIENT_SECRET: Optional[str] = Field(
-        description="Notion client secret key",
+        description="Client secret for Notion API authentication. Required for OAuth 2.0 flow.",
         default=None,
     )
 
     NOTION_INTEGRATION_TYPE: Optional[str] = Field(
-        description="Notion integration type, default to None, available values: internal.",
+        description="Type of Notion integration."
+        " Set to 'internal' for internal integrations, or None for public integrations.",
         default=None,
     )
 
     NOTION_INTERNAL_SECRET: Optional[str] = Field(
-        description="Notion internal secret key",
+        description="Secret key for internal Notion integrations. Required when NOTION_INTEGRATION_TYPE is 'internal'.",
         default=None,
     )
 
     NOTION_INTEGRATION_TOKEN: Optional[str] = Field(
-        description="Notion integration token",
+        description="Integration token for Notion API access. Used for direct API calls without OAuth flow.",
         default=None,
     )
diff --git a/api/configs/extra/sentry_config.py b/api/configs/extra/sentry_config.py
index ea9ea60ffb..f76a6bdb95 100644
--- a/api/configs/extra/sentry_config.py
+++ b/api/configs/extra/sentry_config.py
@@ -6,20 +6,23 @@ from pydantic_settings import BaseSettings
 
 class SentryConfig(BaseSettings):
     """
-    Sentry configs
+    Configuration settings for Sentry error tracking and performance monitoring
     """
 
     SENTRY_DSN: Optional[str] = Field(
-        description="Sentry DSN",
+        description="Sentry Data Source Name (DSN)."
+        " This is the unique identifier of your Sentry project, used to send events to the correct project.",
         default=None,
     )
 
     SENTRY_TRACES_SAMPLE_RATE: NonNegativeFloat = Field(
-        description="Sentry trace sample rate",
+        description="Sample rate for Sentry performance monitoring traces."
+        " Value between 0.0 and 1.0, where 1.0 means 100% of traces are sent to Sentry.",
         default=1.0,
     )
 
     SENTRY_PROFILES_SAMPLE_RATE: NonNegativeFloat = Field(
-        description="Sentry profiles sample rate",
+        description="Sample rate for Sentry profiling."
+        " Value between 0.0 and 1.0, where 1.0 means 100% of profiles are sent to Sentry.",
         default=1.0,
     )
diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index f794552c36..9218d529cc 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -8,145 +8,143 @@ from configs.feature.hosted_service import HostedServiceConfig
 
 class SecurityConfig(BaseSettings):
     """
-    Secret Key configs
+    Security-related configurations for the application
     """
 
     SECRET_KEY: Optional[str] = Field(
-        description="Your App secret key will be used for securely signing the session cookie"
+        description="Secret key for secure session cookie signing."
         "Make sure you are changing this key for your deployment with a strong key."
-        "You can generate a strong key using `openssl rand -base64 42`."
-        "Alternatively you can set it with `SECRET_KEY` environment variable.",
+        "Generate a strong key using `openssl rand -base64 42` or set via the `SECRET_KEY` environment variable.",
         default=None,
     )
 
     RESET_PASSWORD_TOKEN_EXPIRY_HOURS: PositiveInt = Field(
-        description="Expiry time in hours for reset token",
+        description="Duration in hours for which a password reset token remains valid",
         default=24,
     )
 
 
 class AppExecutionConfig(BaseSettings):
     """
-    App Execution configs
+    Configuration parameters for application execution
     """
 
     APP_MAX_EXECUTION_TIME: PositiveInt = Field(
-        description="execution timeout in seconds for app execution",
+        description="Maximum allowed execution time for the application in seconds",
         default=1200,
     )
     APP_MAX_ACTIVE_REQUESTS: NonNegativeInt = Field(
-        description="max active request per app, 0 means unlimited",
+        description="Maximum number of concurrent active requests per app (0 for unlimited)",
         default=0,
     )
 
 
 class CodeExecutionSandboxConfig(BaseSettings):
     """
-    Code Execution Sandbox configs
+    Configuration for the code execution sandbox environment
     """
 
     CODE_EXECUTION_ENDPOINT: HttpUrl = Field(
-        description="endpoint URL of code execution service",
+        description="URL endpoint for the code execution service",
         default="http://sandbox:8194",
     )
 
     CODE_EXECUTION_API_KEY: str = Field(
-        description="API key for code execution service",
+        description="API key for accessing the code execution service",
         default="dify-sandbox",
     )
 
     CODE_EXECUTION_CONNECT_TIMEOUT: Optional[float] = Field(
-        description="connect timeout in seconds for code execution request",
+        description="Connection timeout in seconds for code execution requests",
         default=10.0,
     )
 
     CODE_EXECUTION_READ_TIMEOUT: Optional[float] = Field(
-        description="read timeout in seconds for code execution request",
+        description="Read timeout in seconds for code execution requests",
         default=60.0,
     )
 
     CODE_EXECUTION_WRITE_TIMEOUT: Optional[float] = Field(
-        description="write timeout in seconds for code execution request",
+        description="Write timeout in seconds for code execution request",
         default=10.0,
     )
 
     CODE_MAX_NUMBER: PositiveInt = Field(
-        description="max depth for code execution",
+        description="Maximum allowed numeric value in code execution",
         default=9223372036854775807,
     )
 
     CODE_MIN_NUMBER: NegativeInt = Field(
-        description="",
+        description="Minimum allowed numeric value in code execution",
         default=-9223372036854775807,
     )
 
     CODE_MAX_DEPTH: PositiveInt = Field(
-        description="max depth for code execution",
+        description="Maximum allowed depth for nested structures in code execution",
         default=5,
     )
 
     CODE_MAX_PRECISION: PositiveInt = Field(
-        description="max precision digits for float type in code execution",
+        description="mMaximum number of decimal places for floating-point numbers in code execution",
         default=20,
     )
 
     CODE_MAX_STRING_LENGTH: PositiveInt = Field(
-        description="max string length for code execution",
+        description="Maximum allowed length for strings in code execution",
         default=80000,
     )
 
     CODE_MAX_STRING_ARRAY_LENGTH: PositiveInt = Field(
-        description="",
+        description="Maximum allowed length for string arrays in code execution",
         default=30,
     )
 
     CODE_MAX_OBJECT_ARRAY_LENGTH: PositiveInt = Field(
-        description="",
+        description="Maximum allowed length for object arrays in code execution",
         default=30,
     )
 
     CODE_MAX_NUMBER_ARRAY_LENGTH: PositiveInt = Field(
-        description="",
+        description="Maximum allowed length for numeric arrays in code execution",
         default=1000,
     )
 
 
 class EndpointConfig(BaseSettings):
     """
-    Module URL configs
+    Configuration for various application endpoints and URLs
     """
 
     CONSOLE_API_URL: str = Field(
-        description="The backend URL prefix of the console API."
-        "used to concatenate the login authorization callback or notion integration callback.",
+        description="Base URL for the console API,"
+        "used for login authentication callback or notion integration callbacks",
         default="",
     )
 
     CONSOLE_WEB_URL: str = Field(
-        description="The front-end URL prefix of the console web."
-        "used to concatenate some front-end addresses and for CORS configuration use.",
+        description="Base URL for the console web interface," "used for frontend references and CORS configuration",
         default="",
     )
 
     SERVICE_API_URL: str = Field(
-        description="Service API Url prefix. used to display Service API Base Url to the front-end.",
+        description="Base URL for the service API, displayed to users for API access",
         default="",
     )
 
     APP_WEB_URL: str = Field(
-        description="WebApp Url prefix. used to display WebAPP API Base Url to the front-end.",
+        description="Base URL for the web application, used for frontend references",
         default="",
     )
 
 
 class FileAccessConfig(BaseSettings):
     """
-    File Access configs
+    Configuration for file access and handling
     """
 
     FILES_URL: str = Field(
-        description="File preview or download Url prefix."
-        " used to display File preview or download Url to the front-end or as Multi-model inputs;"
+        description="Base URL for file preview or download,"
+        " used for frontend display and multi-model inputs"
         "Url is signed and has expiration time.",
         validation_alias=AliasChoices("FILES_URL", "CONSOLE_API_URL"),
         alias_priority=1,
@@ -154,49 +152,49 @@ class FileAccessConfig(BaseSettings):
     )
 
     FILES_ACCESS_TIMEOUT: int = Field(
-        description="timeout in seconds for file accessing",
+        description="Expiration time in seconds for file access URLs",
         default=300,
     )
 
 
 class FileUploadConfig(BaseSettings):
     """
-    File Uploading configs
+    Configuration for file upload limitations
     """
 
     UPLOAD_FILE_SIZE_LIMIT: NonNegativeInt = Field(
-        description="size limit in Megabytes for uploading files",
+        description="Maximum allowed file size for uploads in megabytes",
         default=15,
     )
 
     UPLOAD_FILE_BATCH_LIMIT: NonNegativeInt = Field(
-        description="batch size limit for uploading files",
+        description="Maximum number of files allowed in a single upload batch",
         default=5,
     )
 
     UPLOAD_IMAGE_FILE_SIZE_LIMIT: NonNegativeInt = Field(
-        description="image file size limit in Megabytes for uploading files",
+        description="Maximum allowed image file size for uploads in megabytes",
         default=10,
     )
 
     BATCH_UPLOAD_LIMIT: NonNegativeInt = Field(
-        description="",  # todo: to be clarified
+        description="Maximum number of files allowed in a batch upload operation",
         default=20,
     )
 
 
 class HttpConfig(BaseSettings):
     """
-    HTTP configs
+    HTTP-related configurations for the application
     """
 
     API_COMPRESSION_ENABLED: bool = Field(
-        description="whether to enable HTTP response compression of gzip",
+        description="Enable or disable gzip compression for HTTP responses",
         default=False,
     )
 
     inner_CONSOLE_CORS_ALLOW_ORIGINS: str = Field(
-        description="",
+        description="Comma-separated list of allowed origins for CORS in the console",
         validation_alias=AliasChoices("CONSOLE_CORS_ALLOW_ORIGINS", "CONSOLE_WEB_URL"),
         default="",
     )
@@ -218,359 +216,360 @@ class HttpConfig(BaseSettings):
         return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",")
 
     HTTP_REQUEST_MAX_CONNECT_TIMEOUT: Annotated[
-        PositiveInt, Field(ge=10, description="connect timeout in seconds for HTTP request")
+        PositiveInt, Field(ge=10, description="Maximum connection timeout in seconds for HTTP requests")
     ] = 10
 
     HTTP_REQUEST_MAX_READ_TIMEOUT: Annotated[
-        PositiveInt, Field(ge=60, description="read timeout in seconds for HTTP request")
+        PositiveInt, Field(ge=60, description="Maximum read timeout in seconds for HTTP requests")
     ] = 60
 
     HTTP_REQUEST_MAX_WRITE_TIMEOUT: Annotated[
-        PositiveInt, Field(ge=10, description="read timeout in seconds for HTTP request")
+        PositiveInt, Field(ge=10, description="Maximum write timeout in seconds for HTTP requests")
     ] = 20
 
     HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
-        description="",
+        description="Maximum allowed size in bytes for binary data in HTTP requests",
         default=10 * 1024 * 1024,
     )
 
     HTTP_REQUEST_NODE_MAX_TEXT_SIZE: PositiveInt = Field(
-        description="",
+        description="Maximum allowed size in bytes for text data in HTTP requests",
         default=1 * 1024 * 1024,
     )
 
     SSRF_PROXY_HTTP_URL: Optional[str] = Field(
-        description="HTTP URL for SSRF proxy",
+        description="Proxy URL for HTTP requests to prevent Server-Side Request Forgery (SSRF)",
         default=None,
     )
 
     SSRF_PROXY_HTTPS_URL: Optional[str] = Field(
-        description="HTTPS URL for SSRF proxy",
+        description="Proxy URL for HTTPS requests to prevent Server-Side Request Forgery (SSRF)",
         default=None,
     )
 
 
 class InnerAPIConfig(BaseSettings):
     """
-    Inner API configs
+    Configuration for internal API functionality
     """
 
     INNER_API: bool = Field(
-        description="whether to enable the inner API",
+        description="Enable or disable the internal API",
         default=False,
     )
 
     INNER_API_KEY: Optional[str] = Field(
-        description="The inner API key is used to authenticate the inner API",
+        description="API key for accessing the internal API",
         default=None,
     )
 
 
 class LoggingConfig(BaseSettings):
     """
-    Logging configs
+    Configuration for application logging
     """
 
     LOG_LEVEL: str = Field(
-        description="Log output level, default to INFO. It is recommended to set it to ERROR for production.",
+        description="Logging level, default to INFO. Set to ERROR for production environments.",
         default="INFO",
     )
 
     LOG_FILE: Optional[str] = Field(
-        description="logging output file path",
+        description="File path for log output.",
         default=None,
     )
 
     LOG_FORMAT: str = Field(
-        description="log format",
+        description="Format string for log messages",
         default="%(asctime)s.%(msecs)03d %(levelname)s [%(threadName)s] [%(filename)s:%(lineno)d] - %(message)s",
     )
 
     LOG_DATEFORMAT: Optional[str] = Field(
-        description="log date format",
+        description="Date format string for log timestamps",
         default=None,
     )
 
     LOG_TZ: Optional[str] = Field(
-        description="specify log timezone, eg: America/New_York",
+        description="Timezone for log timestamps (e.g., 'America/New_York')",
         default=None,
     )
 
 
 class ModelLoadBalanceConfig(BaseSettings):
     """
-    Model load balance configs
+    Configuration for model load balancing
     """
 
     MODEL_LB_ENABLED: bool = Field(
-        description="whether to enable model load balancing",
+        description="Enable or disable load balancing for models",
         default=False,
     )
 
 
 class BillingConfig(BaseSettings):
     """
-    Platform Billing Configurations
+    Configuration for platform billing features
     """
 
     BILLING_ENABLED: bool = Field(
-        description="whether to enable billing",
+        description="Enable or disable billing functionality",
         default=False,
     )
 
 
 class UpdateConfig(BaseSettings):
     """
-    Update configs
+    Configuration for application update checks
     """
 
     CHECK_UPDATE_URL: str = Field(
-        description="url for checking updates",
+        description="URL to check for application updates",
         default="https://updates.dify.ai",
     )
 
 
 class WorkflowConfig(BaseSettings):
     """
-    Workflow feature configs
+    Configuration for workflow execution
     """
 
     WORKFLOW_MAX_EXECUTION_STEPS: PositiveInt = Field(
-        description="max execution steps in single workflow execution",
+        description="Maximum number of steps allowed in a single workflow execution",
         default=500,
     )
 
     WORKFLOW_MAX_EXECUTION_TIME: PositiveInt = Field(
-        description="max execution time in seconds in single workflow execution",
+        description="Maximum execution time in seconds for a single workflow",
         default=1200,
     )
 
     WORKFLOW_CALL_MAX_DEPTH: PositiveInt = Field(
-        description="max depth of calling in single workflow execution",
+        description="Maximum allowed depth for nested workflow calls",
         default=5,
     )
 
     MAX_VARIABLE_SIZE: PositiveInt = Field(
-        description="The maximum size in bytes of a variable. default to 5KB.",
+        description="Maximum size in bytes for a single variable in workflows. Default to 5KB.",
         default=5 * 1024,
     )
 
 
 class OAuthConfig(BaseSettings):
     """
-    oauth configs
+    Configuration for OAuth authentication
     """
 
     OAUTH_REDIRECT_PATH: str = Field(
-        description="redirect path for OAuth",
+        description="Redirect path for OAuth authentication callbacks",
         default="/console/api/oauth/authorize",
     )
 
     GITHUB_CLIENT_ID: Optional[str] = Field(
-        description="GitHub client id for OAuth",
+        description="GitHub OAuth client secret",
         default=None,
     )
 
     GITHUB_CLIENT_SECRET: Optional[str] = Field(
-        description="GitHub client secret key for OAuth",
+        description="GitHub OAuth client secret",
         default=None,
     )
 
     GOOGLE_CLIENT_ID: Optional[str] = Field(
-        description="Google client id for OAuth",
+        description="Google OAuth client ID",
         default=None,
     )
 
     GOOGLE_CLIENT_SECRET: Optional[str] = Field(
-        description="Google client secret key for OAuth",
+        description="Google OAuth client secret",
         default=None,
     )
 
 
 class ModerationConfig(BaseSettings):
     """
-    Moderation in app configs.
+    Configuration for content moderation
     """
 
     MODERATION_BUFFER_SIZE: PositiveInt = Field(
-        description="buffer size for moderation",
+        description="Size of the buffer for content moderation processing",
         default=300,
     )
 
 
 class ToolConfig(BaseSettings):
     """
-    Tool configs
+    Configuration for tool management
     """
 
     TOOL_ICON_CACHE_MAX_AGE: PositiveInt = Field(
-        description="max age in seconds for tool icon caching",
+        description="Maximum age in seconds for caching tool icons",
         default=3600,
     )
 
 
 class MailConfig(BaseSettings):
     """
-    Mail Configurations
+    Configuration for email services
     """
 
     MAIL_TYPE: Optional[str] = Field(
-        description="Mail provider type name, default to None, available values are `smtp` and `resend`.",
+        description="Email service provider type ('smtp' or 'resend'), default to None.",
         default=None,
     )
 
     MAIL_DEFAULT_SEND_FROM: Optional[str] = Field(
-        description="default email address for sending from ",
+        description="Default email address to use as the sender",
         default=None,
     )
 
     RESEND_API_KEY: Optional[str] = Field(
-        description="API key for Resend",
+        description="API key for Resend email service",
         default=None,
     )
 
     RESEND_API_URL: Optional[str] = Field(
-        description="API URL for Resend",
+        description="API URL for Resend email service",
         default=None,
     )
 
     SMTP_SERVER: Optional[str] = Field(
-        description="smtp server host",
+        description="SMTP server hostname",
         default=None,
     )
 
     SMTP_PORT: Optional[int] = Field(
-        description="smtp server port",
+        description="SMTP server port number",
         default=465,
     )
 
     SMTP_USERNAME: Optional[str] = Field(
-        description="smtp server username",
+        description="Username for SMTP authentication",
         default=None,
     )
 
     SMTP_PASSWORD: Optional[str] = Field(
-        description="smtp server password",
+        description="Password for SMTP authentication",
         default=None,
     )
 
     SMTP_USE_TLS: bool = Field(
-        description="whether to use TLS connection to smtp server",
+        description="Enable TLS encryption for SMTP connections",
         default=False,
     )
 
     SMTP_OPPORTUNISTIC_TLS: bool = Field(
-        description="whether to use opportunistic TLS connection to smtp server",
+        description="Enable opportunistic TLS for SMTP connections",
         default=False,
     )
 
 
 class RagEtlConfig(BaseSettings):
     """
-    RAG ETL Configurations.
+    Configuration for RAG ETL processes
     """
 
     ETL_TYPE: str = Field(
-        description="RAG ETL type name, default to `dify`, available values are `dify` and `Unstructured`. ",
+        description="RAG ETL type ('dify' or 'Unstructured'), default to 'dify'",
         default="dify",
     )
 
     KEYWORD_DATA_SOURCE_TYPE: str = Field(
-        description="source type for keyword data, default to `database`, available values are `database` .",
+        description="Data source type for keyword extraction"
+        " ('database' or other supported types), default to 'database'",
         default="database",
     )
 
     UNSTRUCTURED_API_URL: Optional[str] = Field(
-        description="API URL for Unstructured",
+        description="API URL for Unstructured.io service",
         default=None,
     )
 
     UNSTRUCTURED_API_KEY: Optional[str] = Field(
-        description="API key for Unstructured",
+        description="API key for Unstructured.io service",
         default=None,
     )
 
 
 class DataSetConfig(BaseSettings):
     """
-    Dataset configs
+    Configuration for dataset management
     """
 
     CLEAN_DAY_SETTING: PositiveInt = Field(
-        description="interval in days for cleaning up dataset",
+        description="Interval in days for dataset cleanup operations",
         default=30,
     )
 
     DATASET_OPERATOR_ENABLED: bool = Field(
-        description="whether to enable dataset operator",
+        description="Enable or disable dataset operator functionality",
         default=False,
     )
 
 
 class WorkspaceConfig(BaseSettings):
     """
-    Workspace configs
+    Configuration for workspace management
     """
 
     INVITE_EXPIRY_HOURS: PositiveInt = Field(
-        description="workspaces invitation expiration in hours",
+        description="Expiration time in hours for workspace invitation links",
         default=72,
     )
 
 
 class IndexingConfig(BaseSettings):
     """
-    Indexing configs.
+    Configuration for indexing operations
     """
 
     INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: PositiveInt = Field(
-        description="max segmentation token length for indexing",
+        description="Maximum token length for text segmentation during indexing",
         default=1000,
     )
 
 
 class ImageFormatConfig(BaseSettings):
     MULTIMODAL_SEND_IMAGE_FORMAT: str = Field(
-        description="multi model send image format, support base64, url, default is base64",
+        description="Format for sending images in multimodal contexts ('base64' or 'url'), default is base64",
         default="base64",
     )
 
 
 class CeleryBeatConfig(BaseSettings):
     CELERY_BEAT_SCHEDULER_TIME: int = Field(
-        description="the time of the celery scheduler, default to 1 day",
+        description="Interval in days for Celery Beat scheduler execution, default to 1 day",
         default=1,
     )
 
 
 class PositionConfig(BaseSettings):
     POSITION_PROVIDER_PINS: str = Field(
-        description="The heads of model providers",
+        description="Comma-separated list of pinned model providers",
         default="",
     )
 
     POSITION_PROVIDER_INCLUDES: str = Field(
-        description="The included model providers",
+        description="Comma-separated list of included model providers",
         default="",
     )
 
     POSITION_PROVIDER_EXCLUDES: str = Field(
-        description="The excluded model providers",
+        description="Comma-separated list of excluded model providers",
         default="",
     )
 
     POSITION_TOOL_PINS: str = Field(
-        description="The heads of tools",
+        description="Comma-separated list of pinned tools",
         default="",
     )
 
     POSITION_TOOL_INCLUDES: str = Field(
-        description="The included tools",
+        description="Comma-separated list of included tools",
         default="",
     )
 
     POSITION_TOOL_EXCLUDES: str = Field(
-        description="The excluded tools",
+        description="Comma-separated list of excluded tools",
         default="",
     )
 
diff --git a/api/configs/feature/hosted_service/__init__.py b/api/configs/feature/hosted_service/__init__.py
index f269d0ab9c..7f103be8f4 100644
--- a/api/configs/feature/hosted_service/__init__.py
+++ b/api/configs/feature/hosted_service/__init__.py
@@ -6,31 +6,31 @@ from pydantic_settings import BaseSettings
 
 class HostedOpenAiConfig(BaseSettings):
     """
-    Hosted OpenAI service config
+    Configuration for hosted OpenAI service
     """
 
     HOSTED_OPENAI_API_KEY: Optional[str] = Field(
-        description="",
+        description="API key for hosted OpenAI service",
         default=None,
     )
 
     HOSTED_OPENAI_API_BASE: Optional[str] = Field(
-        description="",
+        description="Base URL for hosted OpenAI API",
         default=None,
     )
 
     HOSTED_OPENAI_API_ORGANIZATION: Optional[str] = Field(
-        description="",
+        description="Organization ID for hosted OpenAI service",
         default=None,
     )
 
     HOSTED_OPENAI_TRIAL_ENABLED: bool = Field(
-        description="",
+        description="Enable trial access to hosted OpenAI service",
         default=False,
     )
 
     HOSTED_OPENAI_TRIAL_MODELS: str = Field(
-        description="",
+        description="Comma-separated list of available models for trial access",
         default="gpt-3.5-turbo,"
         "gpt-3.5-turbo-1106,"
         "gpt-3.5-turbo-instruct,"
@@ -42,17 +42,17 @@ class HostedOpenAiConfig(BaseSettings):
     )
 
     HOSTED_OPENAI_QUOTA_LIMIT: NonNegativeInt = Field(
-        description="",
+        description="Quota limit for hosted OpenAI service usage",
         default=200,
     )
 
     HOSTED_OPENAI_PAID_ENABLED: bool = Field(
-        description="",
+        description="Enable paid access to hosted OpenAI service",
         default=False,
     )
 
     HOSTED_OPENAI_PAID_MODELS: str = Field(
-        description="",
+        description="Comma-separated list of available models for paid access",
         default="gpt-4,"
         "gpt-4-turbo-preview,"
         "gpt-4-turbo-2024-04-09,"
@@ -71,124 +71,122 @@ class HostedOpenAiConfig(BaseSettings):
 
 class HostedAzureOpenAiConfig(BaseSettings):
     """
-    Hosted OpenAI service config
+    Configuration for hosted Azure OpenAI service
     """
 
     HOSTED_AZURE_OPENAI_ENABLED: bool = Field(
-        description="",
+        description="Enable hosted Azure OpenAI service",
         default=False,
     )
 
     HOSTED_AZURE_OPENAI_API_KEY: Optional[str] = Field(
-        description="",
+        description="API key for hosted Azure OpenAI service",
         default=None,
     )
 
     HOSTED_AZURE_OPENAI_API_BASE: Optional[str] = Field(
-        description="",
+        description="Base URL for hosted Azure OpenAI API",
         default=None,
     )
 
     HOSTED_AZURE_OPENAI_QUOTA_LIMIT: NonNegativeInt = Field(
-        description="",
+        description="Quota limit for hosted Azure OpenAI service usage",
         default=200,
     )
 
 
 class HostedAnthropicConfig(BaseSettings):
     """
-    Hosted Azure OpenAI service config
+    Configuration for hosted Anthropic service
     """
 
     HOSTED_ANTHROPIC_API_BASE: Optional[str] = Field(
-        description="",
+        description="Base URL for hosted Anthropic API",
         default=None,
     )
 
     HOSTED_ANTHROPIC_API_KEY: Optional[str] = Field(
-        description="",
+        description="API key for hosted Anthropic service",
         default=None,
     )
 
     HOSTED_ANTHROPIC_TRIAL_ENABLED: bool = Field(
-        description="",
+        description="Enable trial access to hosted Anthropic service",
         default=False,
     )
 
     HOSTED_ANTHROPIC_QUOTA_LIMIT: NonNegativeInt = Field(
-        description="",
+        description="Quota limit for hosted Anthropic service usage",
         default=600000,
     )
 
     HOSTED_ANTHROPIC_PAID_ENABLED: bool = Field(
-        description="",
+        description="Enable paid access to hosted Anthropic service",
         default=False,
     )
 
 
 class HostedMinmaxConfig(BaseSettings):
     """
-    Hosted Minmax service config
+    Configuration for hosted Minmax service
     """
 
     HOSTED_MINIMAX_ENABLED: bool = Field(
-        description="",
+        description="Enable hosted Minmax service",
         default=False,
     )
 
 
 class HostedSparkConfig(BaseSettings):
     """
-    Hosted Spark service config
+    Configuration for hosted Spark service
     """
 
     HOSTED_SPARK_ENABLED: bool = Field(
-        description="",
+        description="Enable hosted Spark service",
         default=False,
     )
 
 
 class HostedZhipuAIConfig(BaseSettings):
     """
-    Hosted Minmax service config
+    Configuration for hosted ZhipuAI service
     """
 
     HOSTED_ZHIPUAI_ENABLED: bool = Field(
-        description="",
+        description="Enable hosted ZhipuAI service",
         default=False,
     )
 
 
 class HostedModerationConfig(BaseSettings):
     """
-    Hosted Moderation service config
+    Configuration for hosted Moderation service
     """
 
     HOSTED_MODERATION_ENABLED: bool = Field(
-        description="",
+        description="Enable hosted Moderation service",
         default=False,
     )
 
     HOSTED_MODERATION_PROVIDERS: str = Field(
-        description="",
+        description="Comma-separated list of moderation providers",
         default="",
     )
 
 
 class HostedFetchAppTemplateConfig(BaseSettings):
     """
-    Hosted Moderation service config
+    Configuration for fetching app templates
     """
 
     HOSTED_FETCH_APP_TEMPLATES_MODE: str = Field(
-        description="the mode for fetching app templates,"
-        " default to remote,"
-        " available values: remote, db, builtin",
+        description="Mode for fetching app templates: remote, db, or builtin" " default to remote,",
         default="remote",
     )
 
     HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN: str = Field(
-        description="the domain for fetching remote app templates",
+        description="Domain for fetching remote app templates",
         default="https://tmpl.dify.ai",
     )
 
diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py
index e017c2c5b8..6ad216c191 100644
--- a/api/configs/middleware/__init__.py
+++ b/api/configs/middleware/__init__.py
@@ -31,70 +31,71 @@ from configs.middleware.vdb.weaviate_config import WeaviateConfig
 
 class StorageConfig(BaseSettings):
     STORAGE_TYPE: str = Field(
-        description="storage type,"
-        " default to `local`,"
-        " available values are `local`, `s3`, `azure-blob`, `aliyun-oss`, `google-storage`.",
+        description="Type of storage to use."
+        " Options: 'local', 's3', 'azure-blob', 'aliyun-oss', 'google-storage'. Default is 'local'.",
         default="local",
     )
 
     STORAGE_LOCAL_PATH: str = Field(
-        description="local storage path",
+        description="Path for local storage when STORAGE_TYPE is set to 'local'.",
         default="storage",
     )
 
 
 class VectorStoreConfig(BaseSettings):
     VECTOR_STORE: Optional[str] = Field(
-        description="vector store type",
+        description="Type of vector store to use for efficient similarity search."
+        " Set to None if not using a vector store.",
         default=None,
     )
 
 
 class KeywordStoreConfig(BaseSettings):
     KEYWORD_STORE: str = Field(
-        description="keyword store type",
+        description="Method for keyword extraction and storage."
+        " Default is 'jieba', a Chinese text segmentation library.",
         default="jieba",
     )
 
 
 class DatabaseConfig:
     DB_HOST: str = Field(
-        description="db host",
+        description="Hostname or IP address of the database server.",
         default="localhost",
     )
 
     DB_PORT: PositiveInt = Field(
-        description="db port",
+        description="Port number for database connection.",
         default=5432,
     )
 
     DB_USERNAME: str = Field(
-        description="db username",
+        description="Username for database authentication.",
         default="postgres",
     )
 
     DB_PASSWORD: str = Field(
-        description="db password",
+        description="Password for database authentication.",
         default="",
     )
 
     DB_DATABASE: str = Field(
-        description="db database",
+        description="Name of the database to connect to.",
         default="dify",
     )
 
     DB_CHARSET: str = Field(
-        description="db charset",
+        description="Character set for database connection.",
         default="",
     )
 
     DB_EXTRAS: str = Field(
-        description="db extras options. Example: keepalives_idle=60&keepalives=1",
+        description="Additional database connection parameters. Example: 'keepalives_idle=60&keepalives=1'",
         default="",
     )
 
     SQLALCHEMY_DATABASE_URI_SCHEME: str = Field(
-        description="db uri scheme",
+        description="Database URI scheme for SQLAlchemy connection.",
         default="postgresql",
     )
 
@@ -112,27 +113,27 @@ class DatabaseConfig:
         )
 
     SQLALCHEMY_POOL_SIZE: NonNegativeInt = Field(
-        description="pool size of SqlAlchemy",
+        description="Maximum number of database connections in the pool.",
         default=30,
     )
 
     SQLALCHEMY_MAX_OVERFLOW: NonNegativeInt = Field(
-        description="max overflows for SqlAlchemy",
+        description="Maximum number of connections that can be created beyond the pool_size.",
         default=10,
     )
 
     SQLALCHEMY_POOL_RECYCLE: NonNegativeInt = Field(
-        description="SqlAlchemy pool recycle",
+        description="Number of seconds after which a connection is automatically recycled.",
         default=3600,
     )
 
     SQLALCHEMY_POOL_PRE_PING: bool = Field(
-        description="whether to enable pool pre-ping in SqlAlchemy",
+        description="If True, enables connection pool pre-ping feature to check connections.",
         default=False,
     )
 
     SQLALCHEMY_ECHO: bool | str = Field(
-        description="whether to enable SqlAlchemy echo",
+        description="If True, SQLAlchemy will log all SQL statements.",
         default=False,
     )
 
@@ -150,27 +151,27 @@ class DatabaseConfig:
 
 class CeleryConfig(DatabaseConfig):
     CELERY_BACKEND: str = Field(
-        description="Celery backend, available values are `database`, `redis`",
+        description="Backend for Celery task results. Options: 'database', 'redis'.",
         default="database",
     )
 
     CELERY_BROKER_URL: Optional[str] = Field(
-        description="CELERY_BROKER_URL",
+        description="URL of the message broker for Celery tasks.",
         default=None,
     )
 
     CELERY_USE_SENTINEL: Optional[bool] = Field(
-        description="Whether to use Redis Sentinel mode",
+        description="Whether to use Redis Sentinel for high availability.",
         default=False,
     )
 
     CELERY_SENTINEL_MASTER_NAME: Optional[str] = Field(
-        description="Redis Sentinel master name",
+        description="Name of the Redis Sentinel master.",
         default=None,
     )
 
     CELERY_SENTINEL_SOCKET_TIMEOUT: Optional[PositiveFloat] = Field(
-        description="Redis Sentinel socket timeout",
+        description="Timeout for Redis Sentinel socket operations in seconds.",
         default=0.1,
     )
 
diff --git a/api/configs/middleware/cache/redis_config.py b/api/configs/middleware/cache/redis_config.py
index 4fcd52ddc9..26b9b1347c 100644
--- a/api/configs/middleware/cache/redis_config.py
+++ b/api/configs/middleware/cache/redis_config.py
@@ -6,65 +6,65 @@ from pydantic_settings import BaseSettings
 
 class RedisConfig(BaseSettings):
     """
-    Redis configs
+    Configuration settings for Redis connection
     """
 
     REDIS_HOST: str = Field(
-        description="Redis host",
+        description="Hostname or IP address of the Redis server",
         default="localhost",
     )
 
     REDIS_PORT: PositiveInt = Field(
-        description="Redis port",
+        description="Port number on which the Redis server is listening",
         default=6379,
     )
 
     REDIS_USERNAME: Optional[str] = Field(
-        description="Redis username",
+        description="Username for Redis authentication (if required)",
         default=None,
     )
 
     REDIS_PASSWORD: Optional[str] = Field(
-        description="Redis password",
+        description="Password for Redis authentication (if required)",
         default=None,
     )
 
     REDIS_DB: NonNegativeInt = Field(
-        description="Redis database id, default to 0",
+        description="Redis database number to use (0-15)",
         default=0,
     )
 
     REDIS_USE_SSL: bool = Field(
-        description="whether to use SSL for Redis connection",
+        description="Enable SSL/TLS for the Redis connection",
         default=False,
     )
 
     REDIS_USE_SENTINEL: Optional[bool] = Field(
-        description="Whether to use Redis Sentinel mode",
+        description="Enable Redis Sentinel mode for high availability",
         default=False,
     )
 
     REDIS_SENTINELS: Optional[str] = Field(
-        description="Redis Sentinel nodes",
+        description="Comma-separated list of Redis Sentinel nodes (host:port)",
         default=None,
     )
 
     REDIS_SENTINEL_SERVICE_NAME: Optional[str] = Field(
-        description="Redis Sentinel service name",
+        description="Name of the Redis Sentinel service to monitor",
         default=None,
     )
 
     REDIS_SENTINEL_USERNAME: Optional[str] = Field(
-        description="Redis Sentinel username",
+        description="Username for Redis Sentinel authentication (if required)",
         default=None,
     )
 
     REDIS_SENTINEL_PASSWORD: Optional[str] = Field(
-        description="Redis Sentinel password",
+        description="Password for Redis Sentinel authentication (if required)",
         default=None,
     )
 
     REDIS_SENTINEL_SOCKET_TIMEOUT: Optional[PositiveFloat] = Field(
-        description="Redis Sentinel socket timeout",
+        description="Socket timeout in seconds for Redis Sentinel connections",
         default=0.1,
     )
diff --git a/api/configs/middleware/storage/aliyun_oss_storage_config.py b/api/configs/middleware/storage/aliyun_oss_storage_config.py
index c1843dc26c..07eb527170 100644
--- a/api/configs/middleware/storage/aliyun_oss_storage_config.py
+++ b/api/configs/middleware/storage/aliyun_oss_storage_config.py
@@ -6,40 +6,40 @@ from pydantic_settings import BaseSettings
 
 class AliyunOSSStorageConfig(BaseSettings):
     """
-    Aliyun storage configs
+    Configuration settings for Aliyun Object Storage Service (OSS)
     """
 
     ALIYUN_OSS_BUCKET_NAME: Optional[str] = Field(
-        description="Aliyun OSS bucket name",
+        description="Name of the Aliyun OSS bucket to store and retrieve objects",
         default=None,
     )
 
     ALIYUN_OSS_ACCESS_KEY: Optional[str] = Field(
-        description="Aliyun OSS access key",
+        description="Access key ID for authenticating with Aliyun OSS",
         default=None,
     )
 
     ALIYUN_OSS_SECRET_KEY: Optional[str] = Field(
-        description="Aliyun OSS secret key",
+        description="Secret access key for authenticating with Aliyun OSS",
         default=None,
     )
 
     ALIYUN_OSS_ENDPOINT: Optional[str] = Field(
-        description="Aliyun OSS endpoint URL",
+        description="URL of the Aliyun OSS endpoint for your chosen region",
         default=None,
     )
 
     ALIYUN_OSS_REGION: Optional[str] = Field(
-        description="Aliyun OSS region",
+        description="Aliyun OSS region where your bucket is located (e.g., 'oss-cn-hangzhou')",
         default=None,
     )
 
     ALIYUN_OSS_AUTH_VERSION: Optional[str] = Field(
-        description="Aliyun OSS authentication version",
+        description="Version of the authentication protocol to use with Aliyun OSS (e.g., 'v4')",
         default=None,
     )
 
     ALIYUN_OSS_PATH: Optional[str] = Field(
-        description="Aliyun OSS path",
+        description="Base path within the bucket to store objects (e.g., 'my-app-data/')",
         default=None,
     )
diff --git a/api/configs/middleware/storage/amazon_s3_storage_config.py b/api/configs/middleware/storage/amazon_s3_storage_config.py
index bef9326108..f2d94b12ff 100644
--- a/api/configs/middleware/storage/amazon_s3_storage_config.py
+++ b/api/configs/middleware/storage/amazon_s3_storage_config.py
@@ -6,40 +6,40 @@ from pydantic_settings import BaseSettings
 
 class S3StorageConfig(BaseSettings):
     """
-    S3 storage configs
+    Configuration settings for S3-compatible object storage
     """
 
     S3_ENDPOINT: Optional[str] = Field(
-        description="S3 storage endpoint",
+        description="URL of the S3-compatible storage endpoint (e.g., 'https://s3.amazonaws.com')",
         default=None,
     )
 
     S3_REGION: Optional[str] = Field(
-        description="S3 storage region",
+        description="Region where the S3 bucket is located (e.g., 'us-east-1')",
         default=None,
     )
 
     S3_BUCKET_NAME: Optional[str] = Field(
-        description="S3 storage bucket name",
+        description="Name of the S3 bucket to store and retrieve objects",
         default=None,
     )
 
     S3_ACCESS_KEY: Optional[str] = Field(
-        description="S3 storage access key",
+        description="Access key ID for authenticating with the S3 service",
         default=None,
     )
 
     S3_SECRET_KEY: Optional[str] = Field(
-        description="S3 storage secret key",
+        description="Secret access key for authenticating with the S3 service",
         default=None,
     )
 
     S3_ADDRESS_STYLE: str = Field(
-        description="S3 storage address style",
+        description="S3 addressing style: 'auto', 'path', or 'virtual'",
         default="auto",
     )
 
     S3_USE_AWS_MANAGED_IAM: bool = Field(
-        description="whether to use aws managed IAM for S3",
+        description="Use AWS managed IAM roles for authentication instead of access/secret keys",
         default=False,
     )
diff --git a/api/configs/middleware/storage/azure_blob_storage_config.py b/api/configs/middleware/storage/azure_blob_storage_config.py
index 10944b58ed..b7ab5247a9 100644
--- a/api/configs/middleware/storage/azure_blob_storage_config.py
+++ b/api/configs/middleware/storage/azure_blob_storage_config.py
@@ -6,25 +6,25 @@ from pydantic_settings import BaseSettings
 
 class AzureBlobStorageConfig(BaseSettings):
     """
-    Azure Blob storage configs
+    Configuration settings for Azure Blob Storage
     """
 
     AZURE_BLOB_ACCOUNT_NAME: Optional[str] = Field(
-        description="Azure Blob account name",
+        description="Name of the Azure Storage account (e.g., 'mystorageaccount')",
         default=None,
     )
 
     AZURE_BLOB_ACCOUNT_KEY: Optional[str] = Field(
-        description="Azure Blob account key",
+        description="Access key for authenticating with the Azure Storage account",
         default=None,
     )
 
     AZURE_BLOB_CONTAINER_NAME: Optional[str] = Field(
-        description="Azure Blob container name",
+        description="Name of the Azure Blob container to store and retrieve objects",
         default=None,
     )
 
     AZURE_BLOB_ACCOUNT_URL: Optional[str] = Field(
-        description="Azure Blob account URL",
+        description="URL of the Azure Blob storage endpoint (e.g., 'https://mystorageaccount.blob.core.windows.net')",
         default=None,
     )
diff --git a/api/configs/middleware/storage/google_cloud_storage_config.py b/api/configs/middleware/storage/google_cloud_storage_config.py
index 10a2d97e8d..e5d763d7f5 100644
--- a/api/configs/middleware/storage/google_cloud_storage_config.py
+++ b/api/configs/middleware/storage/google_cloud_storage_config.py
@@ -6,15 +6,15 @@ from pydantic_settings import BaseSettings
 
 class GoogleCloudStorageConfig(BaseSettings):
     """
-    Google Cloud storage configs
+    Configuration settings for Google Cloud Storage
     """
 
     GOOGLE_STORAGE_BUCKET_NAME: Optional[str] = Field(
-        description="Google Cloud storage bucket name",
+        description="Name of the Google Cloud Storage bucket to store and retrieve objects (e.g., 'my-gcs-bucket')",
         default=None,
     )
 
     GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64: Optional[str] = Field(
-        description="Google Cloud storage service account json base64",
+        description="Base64-encoded JSON key file for Google Cloud service account authentication",
         default=None,
     )
diff --git a/api/configs/middleware/storage/huawei_obs_storage_config.py b/api/configs/middleware/storage/huawei_obs_storage_config.py
index c5cb379cae..3e9e7543ab 100644
--- a/api/configs/middleware/storage/huawei_obs_storage_config.py
+++ b/api/configs/middleware/storage/huawei_obs_storage_config.py
@@ -5,25 +5,25 @@ from pydantic import BaseModel, Field
 
 class HuaweiCloudOBSStorageConfig(BaseModel):
     """
-    Huawei Cloud OBS storage configs
+    Configuration settings for Huawei Cloud Object Storage Service (OBS)
     """
 
     HUAWEI_OBS_BUCKET_NAME: Optional[str] = Field(
-        description="Huawei Cloud OBS bucket name",
+        description="Name of the Huawei Cloud OBS bucket to store and retrieve objects (e.g., 'my-obs-bucket')",
         default=None,
     )
 
     HUAWEI_OBS_ACCESS_KEY: Optional[str] = Field(
-        description="Huawei Cloud OBS Access key",
+        description="Access Key ID for authenticating with Huawei Cloud OBS",
         default=None,
     )
 
     HUAWEI_OBS_SECRET_KEY: Optional[str] = Field(
-        description="Huawei Cloud OBS Secret key",
+        description="Secret Access Key for authenticating with Huawei Cloud OBS",
         default=None,
     )
 
     HUAWEI_OBS_SERVER: Optional[str] = Field(
-        description="Huawei Cloud OBS server URL",
+        description="Endpoint URL for Huawei Cloud OBS (e.g., 'https://obs.cn-north-4.myhuaweicloud.com')",
         default=None,
     )
diff --git a/api/configs/middleware/storage/oci_storage_config.py b/api/configs/middleware/storage/oci_storage_config.py
index f8993496c9..edc245bcac 100644
--- a/api/configs/middleware/storage/oci_storage_config.py
+++ b/api/configs/middleware/storage/oci_storage_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class OCIStorageConfig(BaseSettings):
     """
-    OCI storage configs
+    Configuration settings for Oracle Cloud Infrastructure (OCI) Object Storage
     """
 
     OCI_ENDPOINT: Optional[str] = Field(
-        description="OCI storage endpoint",
+        description="URL of the OCI Object Storage endpoint (e.g., 'https://objectstorage.us-phoenix-1.oraclecloud.com')",
         default=None,
     )
 
     OCI_REGION: Optional[str] = Field(
-        description="OCI storage region",
+        description="OCI region where the bucket is located (e.g., 'us-phoenix-1')",
         default=None,
     )
 
     OCI_BUCKET_NAME: Optional[str] = Field(
-        description="OCI storage bucket name",
+        description="Name of the OCI Object Storage bucket to store and retrieve objects (e.g., 'my-oci-bucket')",
         default=None,
     )
 
     OCI_ACCESS_KEY: Optional[str] = Field(
-        description="OCI storage access key",
+        description="Access key (also known as API key) for authenticating with OCI Object Storage",
         default=None,
     )
 
     OCI_SECRET_KEY: Optional[str] = Field(
-        description="OCI storage secret key",
+        description="Secret key associated with the access key for authenticating with OCI Object Storage",
         default=None,
     )
diff --git a/api/configs/middleware/storage/tencent_cos_storage_config.py b/api/configs/middleware/storage/tencent_cos_storage_config.py
index 765ac08f3e..255c4e8938 100644
--- a/api/configs/middleware/storage/tencent_cos_storage_config.py
+++ b/api/configs/middleware/storage/tencent_cos_storage_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class TencentCloudCOSStorageConfig(BaseSettings):
     """
-    Tencent Cloud COS storage configs
+    Configuration settings for Tencent Cloud Object Storage (COS)
     """
 
     TENCENT_COS_BUCKET_NAME: Optional[str] = Field(
-        description="Tencent Cloud COS bucket name",
+        description="Name of the Tencent Cloud COS bucket to store and retrieve objects",
         default=None,
     )
 
     TENCENT_COS_REGION: Optional[str] = Field(
-        description="Tencent Cloud COS region",
+        description="Tencent Cloud region where the COS bucket is located (e.g., 'ap-guangzhou')",
         default=None,
     )
 
     TENCENT_COS_SECRET_ID: Optional[str] = Field(
-        description="Tencent Cloud COS secret id",
+        description="SecretId for authenticating with Tencent Cloud COS (part of API credentials)",
         default=None,
     )
 
     TENCENT_COS_SECRET_KEY: Optional[str] = Field(
-        description="Tencent Cloud COS secret key",
+        description="SecretKey for authenticating with Tencent Cloud COS (part of API credentials)",
         default=None,
     )
 
     TENCENT_COS_SCHEME: Optional[str] = Field(
-        description="Tencent Cloud COS scheme",
+        description="Protocol scheme for COS requests: 'https' (recommended) or 'http'",
         default=None,
     )
diff --git a/api/configs/middleware/storage/volcengine_tos_storage_config.py b/api/configs/middleware/storage/volcengine_tos_storage_config.py
index a0e09a3cc7..89ea885002 100644
--- a/api/configs/middleware/storage/volcengine_tos_storage_config.py
+++ b/api/configs/middleware/storage/volcengine_tos_storage_config.py
@@ -5,30 +5,30 @@ from pydantic import BaseModel, Field
 
 class VolcengineTOSStorageConfig(BaseModel):
     """
-    Volcengine tos storage configs
+    Configuration settings for Volcengine Tinder Object Storage (TOS)
     """
 
     VOLCENGINE_TOS_BUCKET_NAME: Optional[str] = Field(
-        description="Volcengine TOS Bucket Name",
+        description="Name of the Volcengine TOS bucket to store and retrieve objects (e.g., 'my-tos-bucket')",
         default=None,
     )
 
     VOLCENGINE_TOS_ACCESS_KEY: Optional[str] = Field(
-        description="Volcengine TOS Access Key",
+        description="Access Key ID for authenticating with Volcengine TOS",
         default=None,
     )
 
     VOLCENGINE_TOS_SECRET_KEY: Optional[str] = Field(
-        description="Volcengine TOS Secret Key",
+        description="Secret Access Key for authenticating with Volcengine TOS",
         default=None,
     )
 
     VOLCENGINE_TOS_ENDPOINT: Optional[str] = Field(
-        description="Volcengine TOS Endpoint URL",
+        description="URL of the Volcengine TOS endpoint (e.g., 'https://tos-cn-beijing.volces.com')",
         default=None,
     )
 
     VOLCENGINE_TOS_REGION: Optional[str] = Field(
-        description="Volcengine TOS Region",
+        description="Volcengine region where the TOS bucket is located (e.g., 'cn-beijing')",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/analyticdb_config.py b/api/configs/middleware/vdb/analyticdb_config.py
index 04f5b0e5bf..247a8ea555 100644
--- a/api/configs/middleware/vdb/analyticdb_config.py
+++ b/api/configs/middleware/vdb/analyticdb_config.py
@@ -5,33 +5,38 @@ from pydantic import BaseModel, Field
 
 class AnalyticdbConfig(BaseModel):
     """
-    Configuration for connecting to AnalyticDB.
+    Configuration for connecting to Alibaba Cloud AnalyticDB for PostgreSQL.
     Refer to the following documentation for details on obtaining credentials:
     https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/getting-started/create-an-instance-instances-with-vector-engine-optimization-enabled
     """
 
     ANALYTICDB_KEY_ID: Optional[str] = Field(
-        default=None, description="The Access Key ID provided by Alibaba Cloud for authentication."
+        default=None, description="The Access Key ID provided by Alibaba Cloud for API authentication."
     )
     ANALYTICDB_KEY_SECRET: Optional[str] = Field(
-        default=None, description="The Secret Access Key corresponding to the Access Key ID for secure access."
+        default=None, description="The Secret Access Key corresponding to the Access Key ID for secure API access."
     )
     ANALYTICDB_REGION_ID: Optional[str] = Field(
-        default=None, description="The region where the AnalyticDB instance is deployed (e.g., 'cn-hangzhou')."
+        default=None,
+        description="The region where the AnalyticDB instance is deployed (e.g., 'cn-hangzhou', 'ap-southeast-1').",
     )
     ANALYTICDB_INSTANCE_ID: Optional[str] = Field(
         default=None,
-        description="The unique identifier of the AnalyticDB instance you want to connect to (e.g., 'gp-ab123456')..",
+        description="The unique identifier of the AnalyticDB instance you want to connect to.",
     )
     ANALYTICDB_ACCOUNT: Optional[str] = Field(
-        default=None, description="The account name used to log in to the AnalyticDB instance."
+        default=None,
+        description="The account name used to log in to the AnalyticDB instance"
+        " (usually the initial account created with the instance).",
     )
     ANALYTICDB_PASSWORD: Optional[str] = Field(
-        default=None, description="The password associated with the AnalyticDB account for authentication."
+        default=None, description="The password associated with the AnalyticDB account for database authentication."
     )
     ANALYTICDB_NAMESPACE: Optional[str] = Field(
-        default=None, description="The namespace within AnalyticDB for schema isolation."
+        default=None, description="The namespace within AnalyticDB for schema isolation (if using namespace feature)."
     )
     ANALYTICDB_NAMESPACE_PASSWORD: Optional[str] = Field(
-        default=None, description="The password for accessing the specified namespace within the AnalyticDB instance."
+        default=None,
+        description="The password for accessing the specified namespace within the AnalyticDB instance"
+        " (if namespace feature is enabled).",
     )
diff --git a/api/configs/middleware/vdb/chroma_config.py b/api/configs/middleware/vdb/chroma_config.py
index d386623a56..e83a9902de 100644
--- a/api/configs/middleware/vdb/chroma_config.py
+++ b/api/configs/middleware/vdb/chroma_config.py
@@ -6,35 +6,35 @@ from pydantic_settings import BaseSettings
 
 class ChromaConfig(BaseSettings):
     """
-    Chroma configs
+    Configuration settings for Chroma vector database
     """
 
     CHROMA_HOST: Optional[str] = Field(
-        description="Chroma host",
+        description="Hostname or IP address of the Chroma server (e.g., 'localhost' or '192.168.1.100')",
         default=None,
     )
 
     CHROMA_PORT: PositiveInt = Field(
-        description="Chroma port",
+        description="Port number on which the Chroma server is listening (default is 8000)",
         default=8000,
     )
 
     CHROMA_TENANT: Optional[str] = Field(
-        description="Chroma database",
+        description="Tenant identifier for multi-tenancy support in Chroma",
         default=None,
     )
 
     CHROMA_DATABASE: Optional[str] = Field(
-        description="Chroma database",
+        description="Name of the Chroma database to connect to",
         default=None,
     )
 
     CHROMA_AUTH_PROVIDER: Optional[str] = Field(
-        description="Chroma authentication provider",
+        description="Authentication provider for Chroma (e.g., 'basic', 'token', or a custom provider)",
         default=None,
     )
 
     CHROMA_AUTH_CREDENTIALS: Optional[str] = Field(
-        description="Chroma authentication credentials",
+        description="Authentication credentials for Chroma (format depends on the auth provider)",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/elasticsearch_config.py b/api/configs/middleware/vdb/elasticsearch_config.py
index 5b6a8fd939..df8182985d 100644
--- a/api/configs/middleware/vdb/elasticsearch_config.py
+++ b/api/configs/middleware/vdb/elasticsearch_config.py
@@ -6,25 +6,25 @@ from pydantic_settings import BaseSettings
 
 class ElasticsearchConfig(BaseSettings):
     """
-    Elasticsearch configs
+    Configuration settings for Elasticsearch
     """
 
     ELASTICSEARCH_HOST: Optional[str] = Field(
-        description="Elasticsearch host",
+        description="Hostname or IP address of the Elasticsearch server (e.g., 'localhost' or '192.168.1.100')",
         default="127.0.0.1",
     )
 
     ELASTICSEARCH_PORT: PositiveInt = Field(
-        description="Elasticsearch port",
+        description="Port number on which the Elasticsearch server is listening (default is 9200)",
         default=9200,
     )
 
     ELASTICSEARCH_USERNAME: Optional[str] = Field(
-        description="Elasticsearch username",
+        description="Username for authenticating with Elasticsearch (default is 'elastic')",
         default="elastic",
     )
 
     ELASTICSEARCH_PASSWORD: Optional[str] = Field(
-        description="Elasticsearch password",
+        description="Password for authenticating with Elasticsearch (default is 'elastic')",
         default="elastic",
     )
diff --git a/api/configs/middleware/vdb/milvus_config.py b/api/configs/middleware/vdb/milvus_config.py
index 98d375966a..231cbbbe8f 100644
--- a/api/configs/middleware/vdb/milvus_config.py
+++ b/api/configs/middleware/vdb/milvus_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class MilvusConfig(BaseSettings):
     """
-    Milvus configs
+    Configuration settings for Milvus vector database
     """
 
     MILVUS_URI: Optional[str] = Field(
-        description="Milvus uri",
+        description="URI for connecting to the Milvus server (e.g., 'http://localhost:19530' or 'https://milvus-instance.example.com:19530')",
         default="http://127.0.0.1:19530",
     )
 
     MILVUS_TOKEN: Optional[str] = Field(
-        description="Milvus token",
+        description="Authentication token for Milvus, if token-based authentication is enabled",
         default=None,
     )
 
     MILVUS_USER: Optional[str] = Field(
-        description="Milvus user",
+        description="Username for authenticating with Milvus, if username/password authentication is enabled",
         default=None,
     )
 
     MILVUS_PASSWORD: Optional[str] = Field(
-        description="Milvus password",
+        description="Password for authenticating with Milvus, if username/password authentication is enabled",
         default=None,
     )
 
     MILVUS_DATABASE: str = Field(
-        description="Milvus database, default to `default`",
+        description="Name of the Milvus database to connect to (default is 'default')",
         default="default",
     )
diff --git a/api/configs/middleware/vdb/myscale_config.py b/api/configs/middleware/vdb/myscale_config.py
index 6451d26e1c..5896c19d27 100644
--- a/api/configs/middleware/vdb/myscale_config.py
+++ b/api/configs/middleware/vdb/myscale_config.py
@@ -3,35 +3,35 @@ from pydantic import BaseModel, Field, PositiveInt
 
 class MyScaleConfig(BaseModel):
     """
-    MyScale configs
+    Configuration settings for MyScale vector database
     """
 
     MYSCALE_HOST: str = Field(
-        description="MyScale host",
+        description="Hostname or IP address of the MyScale server (e.g., 'localhost' or 'myscale.example.com')",
         default="localhost",
     )
 
     MYSCALE_PORT: PositiveInt = Field(
-        description="MyScale port",
+        description="Port number on which the MyScale server is listening (default is 8123)",
         default=8123,
     )
 
     MYSCALE_USER: str = Field(
-        description="MyScale user",
+        description="Username for authenticating with MyScale (default is 'default')",
         default="default",
     )
 
     MYSCALE_PASSWORD: str = Field(
-        description="MyScale password",
+        description="Password for authenticating with MyScale (default is an empty string)",
         default="",
     )
 
     MYSCALE_DATABASE: str = Field(
-        description="MyScale database name",
+        description="Name of the MyScale database to connect to (default is 'default')",
         default="default",
     )
 
     MYSCALE_FTS_PARAMS: str = Field(
-        description="MyScale fts index parameters",
+        description="Additional parameters for MyScale Full Text Search index)",
         default="",
     )
diff --git a/api/configs/middleware/vdb/opensearch_config.py b/api/configs/middleware/vdb/opensearch_config.py
index 5823dc1433..81dde4c04d 100644
--- a/api/configs/middleware/vdb/opensearch_config.py
+++ b/api/configs/middleware/vdb/opensearch_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class OpenSearchConfig(BaseSettings):
     """
-    OpenSearch configs
+    Configuration settings for OpenSearch
     """
 
     OPENSEARCH_HOST: Optional[str] = Field(
-        description="OpenSearch host",
+        description="Hostname or IP address of the OpenSearch server (e.g., 'localhost' or 'opensearch.example.com')",
         default=None,
     )
 
     OPENSEARCH_PORT: PositiveInt = Field(
-        description="OpenSearch port",
+        description="Port number on which the OpenSearch server is listening (default is 9200)",
         default=9200,
     )
 
     OPENSEARCH_USER: Optional[str] = Field(
-        description="OpenSearch user",
+        description="Username for authenticating with OpenSearch",
         default=None,
     )
 
     OPENSEARCH_PASSWORD: Optional[str] = Field(
-        description="OpenSearch password",
+        description="Password for authenticating with OpenSearch",
         default=None,
     )
 
     OPENSEARCH_SECURE: bool = Field(
-        description="whether to use SSL connection for OpenSearch",
+        description="Whether to use SSL/TLS encrypted connection for OpenSearch (True for HTTPS, False for HTTP)",
         default=False,
     )
diff --git a/api/configs/middleware/vdb/oracle_config.py b/api/configs/middleware/vdb/oracle_config.py
index 62614ae870..44e2f13345 100644
--- a/api/configs/middleware/vdb/oracle_config.py
+++ b/api/configs/middleware/vdb/oracle_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class OracleConfig(BaseSettings):
     """
-    ORACLE configs
+    Configuration settings for Oracle database
     """
 
     ORACLE_HOST: Optional[str] = Field(
-        description="ORACLE host",
+        description="Hostname or IP address of the Oracle database server (e.g., 'localhost' or 'oracle.example.com')",
         default=None,
     )
 
     ORACLE_PORT: Optional[PositiveInt] = Field(
-        description="ORACLE port",
+        description="Port number on which the Oracle database server is listening (default is 1521)",
         default=1521,
     )
 
     ORACLE_USER: Optional[str] = Field(
-        description="ORACLE user",
+        description="Username for authenticating with the Oracle database",
         default=None,
     )
 
     ORACLE_PASSWORD: Optional[str] = Field(
-        description="ORACLE password",
+        description="Password for authenticating with the Oracle database",
         default=None,
     )
 
     ORACLE_DATABASE: Optional[str] = Field(
-        description="ORACLE database",
+        description="Name of the Oracle database or service to connect to (e.g., 'ORCL' or 'pdborcl')",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/pgvector_config.py b/api/configs/middleware/vdb/pgvector_config.py
index 39a7c1d8d5..395dcaa420 100644
--- a/api/configs/middleware/vdb/pgvector_config.py
+++ b/api/configs/middleware/vdb/pgvector_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class PGVectorConfig(BaseSettings):
     """
-    PGVector configs
+    Configuration settings for PGVector (PostgreSQL with vector extension)
     """
 
     PGVECTOR_HOST: Optional[str] = Field(
-        description="PGVector host",
+        description="Hostname or IP address of the PostgreSQL server with PGVector extension (e.g., 'localhost')",
         default=None,
     )
 
     PGVECTOR_PORT: Optional[PositiveInt] = Field(
-        description="PGVector port",
+        description="Port number on which the PostgreSQL server is listening (default is 5433)",
         default=5433,
     )
 
     PGVECTOR_USER: Optional[str] = Field(
-        description="PGVector user",
+        description="Username for authenticating with the PostgreSQL database",
         default=None,
     )
 
     PGVECTOR_PASSWORD: Optional[str] = Field(
-        description="PGVector password",
+        description="Password for authenticating with the PostgreSQL database",
         default=None,
     )
 
     PGVECTOR_DATABASE: Optional[str] = Field(
-        description="PGVector database",
+        description="Name of the PostgreSQL database to connect to",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/pgvectors_config.py b/api/configs/middleware/vdb/pgvectors_config.py
index c40e5ff921..8d7a4b8d25 100644
--- a/api/configs/middleware/vdb/pgvectors_config.py
+++ b/api/configs/middleware/vdb/pgvectors_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class PGVectoRSConfig(BaseSettings):
     """
-    PGVectoRS configs
+    Configuration settings for PGVecto.RS (Rust-based vector extension for PostgreSQL)
     """
 
     PGVECTO_RS_HOST: Optional[str] = Field(
-        description="PGVectoRS host",
+        description="Hostname or IP address of the PostgreSQL server with PGVecto.RS extension (e.g., 'localhost')",
         default=None,
     )
 
     PGVECTO_RS_PORT: Optional[PositiveInt] = Field(
-        description="PGVectoRS port",
+        description="Port number on which the PostgreSQL server with PGVecto.RS is listening (default is 5431)",
         default=5431,
     )
 
     PGVECTO_RS_USER: Optional[str] = Field(
-        description="PGVectoRS user",
+        description="Username for authenticating with the PostgreSQL database using PGVecto.RS",
         default=None,
     )
 
     PGVECTO_RS_PASSWORD: Optional[str] = Field(
-        description="PGVectoRS password",
+        description="Password for authenticating with the PostgreSQL database using PGVecto.RS",
         default=None,
     )
 
     PGVECTO_RS_DATABASE: Optional[str] = Field(
-        description="PGVectoRS database",
+        description="Name of the PostgreSQL database with PGVecto.RS extension to connect to",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/qdrant_config.py b/api/configs/middleware/vdb/qdrant_config.py
index 27f75491c9..b70f624652 100644
--- a/api/configs/middleware/vdb/qdrant_config.py
+++ b/api/configs/middleware/vdb/qdrant_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class QdrantConfig(BaseSettings):
     """
-    Qdrant configs
+    Configuration settings for Qdrant vector database
     """
 
     QDRANT_URL: Optional[str] = Field(
-        description="Qdrant url",
+        description="URL of the Qdrant server (e.g., 'http://localhost:6333' or 'https://qdrant.example.com')",
         default=None,
     )
 
     QDRANT_API_KEY: Optional[str] = Field(
-        description="Qdrant api key",
+        description="API key for authenticating with the Qdrant server",
         default=None,
     )
 
     QDRANT_CLIENT_TIMEOUT: NonNegativeInt = Field(
-        description="Qdrant client timeout in seconds",
+        description="Timeout in seconds for Qdrant client operations (default is 20 seconds)",
         default=20,
     )
 
     QDRANT_GRPC_ENABLED: bool = Field(
-        description="whether enable grpc support for Qdrant connection",
+        description="Whether to enable gRPC support for Qdrant connection (True for gRPC, False for HTTP)",
         default=False,
     )
 
     QDRANT_GRPC_PORT: PositiveInt = Field(
-        description="Qdrant grpc port",
+        description="Port number for gRPC connection to Qdrant server (default is 6334)",
         default=6334,
     )
diff --git a/api/configs/middleware/vdb/relyt_config.py b/api/configs/middleware/vdb/relyt_config.py
index 66b9ecc03f..5ffbea7b19 100644
--- a/api/configs/middleware/vdb/relyt_config.py
+++ b/api/configs/middleware/vdb/relyt_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class RelytConfig(BaseSettings):
     """
-    Relyt configs
+    Configuration settings for Relyt database
     """
 
     RELYT_HOST: Optional[str] = Field(
-        description="Relyt host",
+        description="Hostname or IP address of the Relyt server (e.g., 'localhost' or 'relyt.example.com')",
         default=None,
     )
 
     RELYT_PORT: PositiveInt = Field(
-        description="Relyt port",
+        description="Port number on which the Relyt server is listening (default is 9200)",
         default=9200,
     )
 
     RELYT_USER: Optional[str] = Field(
-        description="Relyt user",
+        description="Username for authenticating with the Relyt database",
         default=None,
     )
 
     RELYT_PASSWORD: Optional[str] = Field(
-        description="Relyt password",
+        description="Password for authenticating with the Relyt database",
         default=None,
     )
 
     RELYT_DATABASE: Optional[str] = Field(
-        description="Relyt database",
+        description="Name of the Relyt database to connect to (default is 'default')",
         default="default",
     )
diff --git a/api/configs/middleware/vdb/tencent_vector_config.py b/api/configs/middleware/vdb/tencent_vector_config.py
index 46b4cb6a24..9cf4d07f6f 100644
--- a/api/configs/middleware/vdb/tencent_vector_config.py
+++ b/api/configs/middleware/vdb/tencent_vector_config.py
@@ -6,45 +6,45 @@ from pydantic_settings import BaseSettings
 
 class TencentVectorDBConfig(BaseSettings):
     """
-    Tencent Vector configs
+    Configuration settings for Tencent Vector Database
     """
 
     TENCENT_VECTOR_DB_URL: Optional[str] = Field(
-        description="Tencent Vector URL",
+        description="URL of the Tencent Vector Database service (e.g., 'https://vectordb.tencentcloudapi.com')",
         default=None,
     )
 
     TENCENT_VECTOR_DB_API_KEY: Optional[str] = Field(
-        description="Tencent Vector API key",
+        description="API key for authenticating with the Tencent Vector Database service",
         default=None,
     )
 
     TENCENT_VECTOR_DB_TIMEOUT: PositiveInt = Field(
-        description="Tencent Vector timeout in seconds",
+        description="Timeout in seconds for Tencent Vector Database operations (default is 30 seconds)",
         default=30,
     )
 
     TENCENT_VECTOR_DB_USERNAME: Optional[str] = Field(
-        description="Tencent Vector username",
+        description="Username for authenticating with the Tencent Vector Database (if required)",
         default=None,
     )
 
     TENCENT_VECTOR_DB_PASSWORD: Optional[str] = Field(
-        description="Tencent Vector password",
+        description="Password for authenticating with the Tencent Vector Database (if required)",
         default=None,
     )
 
     TENCENT_VECTOR_DB_SHARD: PositiveInt = Field(
-        description="Tencent Vector sharding number",
+        description="Number of shards for the Tencent Vector Database (default is 1)",
         default=1,
     )
 
     TENCENT_VECTOR_DB_REPLICAS: NonNegativeInt = Field(
-        description="Tencent Vector replicas",
+        description="Number of replicas for the Tencent Vector Database (default is 2)",
         default=2,
     )
 
     TENCENT_VECTOR_DB_DATABASE: Optional[str] = Field(
-        description="Tencent Vector Database",
+        description="Name of the specific Tencent Vector Database to connect to",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/tidb_vector_config.py b/api/configs/middleware/vdb/tidb_vector_config.py
index dbcb276c01..bc68be69d8 100644
--- a/api/configs/middleware/vdb/tidb_vector_config.py
+++ b/api/configs/middleware/vdb/tidb_vector_config.py
@@ -6,30 +6,30 @@ from pydantic_settings import BaseSettings
 
 class TiDBVectorConfig(BaseSettings):
     """
-    TiDB Vector configs
+    Configuration settings for TiDB Vector database
     """
 
     TIDB_VECTOR_HOST: Optional[str] = Field(
-        description="TiDB Vector host",
+        description="Hostname or IP address of the TiDB Vector server (e.g., 'localhost' or 'tidb.example.com')",
         default=None,
     )
 
     TIDB_VECTOR_PORT: Optional[PositiveInt] = Field(
-        description="TiDB Vector port",
+        description="Port number on which the TiDB Vector server is listening (default is 4000)",
         default=4000,
     )
 
     TIDB_VECTOR_USER: Optional[str] = Field(
-        description="TiDB Vector user",
+        description="Username for authenticating with the TiDB Vector database",
         default=None,
     )
 
     TIDB_VECTOR_PASSWORD: Optional[str] = Field(
-        description="TiDB Vector password",
+        description="Password for authenticating with the TiDB Vector database",
         default=None,
     )
 
     TIDB_VECTOR_DATABASE: Optional[str] = Field(
-        description="TiDB Vector database",
+        description="Name of the TiDB Vector database to connect to",
         default=None,
     )
diff --git a/api/configs/middleware/vdb/weaviate_config.py b/api/configs/middleware/vdb/weaviate_config.py
index 63d1022f6a..25000e8bde 100644
--- a/api/configs/middleware/vdb/weaviate_config.py
+++ b/api/configs/middleware/vdb/weaviate_config.py
@@ -6,25 +6,25 @@ from pydantic_settings import BaseSettings
 
 class WeaviateConfig(BaseSettings):
     """
-    Weaviate configs
+    Configuration settings for Weaviate vector database
     """
 
     WEAVIATE_ENDPOINT: Optional[str] = Field(
-        description="Weaviate endpoint URL",
+        description="URL of the Weaviate server (e.g., 'http://localhost:8080' or 'https://weaviate.example.com')",
         default=None,
     )
 
     WEAVIATE_API_KEY: Optional[str] = Field(
-        description="Weaviate API key",
+        description="API key for authenticating with the Weaviate server",
         default=None,
     )
 
     WEAVIATE_GRPC_ENABLED: bool = Field(
-        description="whether to enable gRPC for Weaviate connection",
+        description="Whether to enable gRPC for Weaviate connection (True for gRPC, False for HTTP)",
         default=True,
     )
 
     WEAVIATE_BATCH_SIZE: PositiveInt = Field(
-        description="Weaviate batch size",
+        description="Number of objects to be processed in a single batch operation (default is 100)",
         default=100,
     )
diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py
index 3815a6fca2..c752660122 100644
--- a/api/configs/packaging/__init__.py
+++ b/api/configs/packaging/__init__.py
@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
 
     CURRENT_VERSION: str = Field(
         description="Dify version",
-        default="0.8.2",
+        default="0.8.3",
     )
 
     COMMIT_SHA: str = Field(
diff --git a/api/constants/__init__.py b/api/constants/__init__.py
index e22c3268ef..75eaf81638 100644
--- a/api/constants/__init__.py
+++ b/api/constants/__init__.py
@@ -1 +1,2 @@
 HIDDEN_VALUE = "[__HIDDEN__]"
+UUID_NIL = "00000000-0000-0000-0000-000000000000"
diff --git a/api/controllers/console/app/completion.py b/api/controllers/console/app/completion.py
index 53de51c24d..d3296d3dff 100644
--- a/api/controllers/console/app/completion.py
+++ b/api/controllers/console/app/completion.py
@@ -109,6 +109,7 @@ class ChatMessageApi(Resource):
         parser.add_argument("files", type=list, required=False, location="json")
         parser.add_argument("model_config", type=dict, required=True, location="json")
         parser.add_argument("conversation_id", type=uuid_value, location="json")
+        parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
         parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
         parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
         args = parser.parse_args()
diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py
index fe06201982..2fba3e0af0 100644
--- a/api/controllers/console/app/message.py
+++ b/api/controllers/console/app/message.py
@@ -105,8 +105,6 @@ class ChatMessageListApi(Resource):
             if rest_count > 0:
                 has_more = True
 
-        history_messages = list(reversed(history_messages))
-
         return InfiniteScrollPagination(data=history_messages, limit=args["limit"], has_more=has_more)
 
 
diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py
index b488deb89d..0a693b84e2 100644
--- a/api/controllers/console/app/workflow.py
+++ b/api/controllers/console/app/workflow.py
@@ -166,6 +166,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
         parser.add_argument("query", type=str, required=True, location="json", default="")
         parser.add_argument("files", type=list, location="json")
         parser.add_argument("conversation_id", type=uuid_value, location="json")
+        parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
+
         args = parser.parse_args()
 
         try:
diff --git a/api/controllers/console/explore/completion.py b/api/controllers/console/explore/completion.py
index f464692098..125bc1af8c 100644
--- a/api/controllers/console/explore/completion.py
+++ b/api/controllers/console/explore/completion.py
@@ -100,6 +100,7 @@ class ChatApi(InstalledAppResource):
         parser.add_argument("query", type=str, required=True, location="json")
         parser.add_argument("files", type=list, required=False, location="json")
         parser.add_argument("conversation_id", type=uuid_value, location="json")
+        parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
         parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
         args = parser.parse_args()
 
diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py
index 0e0238556c..3d221ff30a 100644
--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -51,7 +51,7 @@ class MessageListApi(InstalledAppResource):
 
         try:
             return MessageService.pagination_by_first_id(
-                app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
+                app_model, current_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
             )
         except services.errors.conversation.ConversationNotExistsError:
             raise NotFound("Conversation Not Exists.")
diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py
index e54e6f4903..a70ee89b5e 100644
--- a/api/controllers/service_api/app/message.py
+++ b/api/controllers/service_api/app/message.py
@@ -54,6 +54,7 @@ class MessageListApi(Resource):
     message_fields = {
         "id": fields.String,
         "conversation_id": fields.String,
+        "parent_message_id": fields.String,
         "inputs": fields.Raw,
         "query": fields.String,
         "answer": fields.String(attribute="re_sign_file_url_answer"),
diff --git a/api/controllers/web/completion.py b/api/controllers/web/completion.py
index 115492b796..45b890dfc4 100644
--- a/api/controllers/web/completion.py
+++ b/api/controllers/web/completion.py
@@ -96,6 +96,7 @@ class ChatApi(WebApiResource):
         parser.add_argument("files", type=list, required=False, location="json")
         parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
         parser.add_argument("conversation_id", type=uuid_value, location="json")
+        parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
         parser.add_argument("retriever_from", type=str, required=False, default="web_app", location="json")
 
         args = parser.parse_args()
diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py
index 0d4047f4ef..2d2a5866c8 100644
--- a/api/controllers/web/message.py
+++ b/api/controllers/web/message.py
@@ -57,6 +57,7 @@ class MessageListApi(WebApiResource):
     message_fields = {
         "id": fields.String,
         "conversation_id": fields.String,
+        "parent_message_id": fields.String,
         "inputs": fields.Raw,
         "query": fields.String,
         "answer": fields.String(attribute="re_sign_file_url_answer"),
@@ -89,7 +90,7 @@ class MessageListApi(WebApiResource):
 
         try:
             return MessageService.pagination_by_first_id(
-                app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
+                app_model, end_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
             )
         except services.errors.conversation.ConversationNotExistsError:
             raise NotFound("Conversation Not Exists.")
diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py
index d09a9956a4..5295f97bdb 100644
--- a/api/core/agent/base_agent_runner.py
+++ b/api/core/agent/base_agent_runner.py
@@ -32,6 +32,7 @@ from core.model_runtime.entities.message_entities import (
 from core.model_runtime.entities.model_entities import ModelFeature
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from core.model_runtime.utils.encoders import jsonable_encoder
+from core.prompt.utils.extract_thread_messages import extract_thread_messages
 from core.tools.entities.tool_entities import (
     ToolParameter,
     ToolRuntimeVariablePool,
@@ -441,10 +442,12 @@ class BaseAgentRunner(AppRunner):
             .filter(
                 Message.conversation_id == self.message.conversation_id,
             )
-            .order_by(Message.created_at.asc())
+            .order_by(Message.created_at.desc())
             .all()
         )
 
+        messages = list(reversed(extract_thread_messages(messages)))
+
         for message in messages:
             if message.id == self.message.id:
                 continue
diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py
index 88e1256ed5..445ef6d0ab 100644
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@@ -121,6 +121,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
             inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config),
             query=query,
             files=file_objs,
+            parent_message_id=args.get("parent_message_id"),
             user_id=user.id,
             stream=stream,
             invoke_from=invoke_from,
diff --git a/api/core/app/apps/agent_chat/app_generator.py b/api/core/app/apps/agent_chat/app_generator.py
index abf8a332ab..99abccf4f9 100644
--- a/api/core/app/apps/agent_chat/app_generator.py
+++ b/api/core/app/apps/agent_chat/app_generator.py
@@ -127,6 +127,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
             inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config),
             query=query,
             files=file_objs,
+            parent_message_id=args.get("parent_message_id"),
             user_id=user.id,
             stream=stream,
             invoke_from=invoke_from,
diff --git a/api/core/app/apps/chat/app_generator.py b/api/core/app/apps/chat/app_generator.py
index 032556ec4c..9ef1366a0f 100644
--- a/api/core/app/apps/chat/app_generator.py
+++ b/api/core/app/apps/chat/app_generator.py
@@ -128,6 +128,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
             inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config),
             query=query,
             files=file_objs,
+            parent_message_id=args.get("parent_message_id"),
             user_id=user.id,
             stream=stream,
             invoke_from=invoke_from,
diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py
index c4db95cbd0..65b759acf5 100644
--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@@ -218,6 +218,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
             answer_tokens=0,
             answer_unit_price=0,
             answer_price_unit=0,
+            parent_message_id=getattr(application_generate_entity, "parent_message_id", None),
             provider_response_latency=0,
             total_price=0,
             currency="USD",
diff --git a/api/core/app/entities/app_invoke_entities.py b/api/core/app/entities/app_invoke_entities.py
index ab8d4e374e..87ca51ef1b 100644
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@@ -122,6 +122,7 @@ class ChatAppGenerateEntity(EasyUIBasedAppGenerateEntity):
     """
 
     conversation_id: Optional[str] = None
+    parent_message_id: Optional[str] = None
 
 
 class CompletionAppGenerateEntity(EasyUIBasedAppGenerateEntity):
@@ -138,6 +139,7 @@ class AgentChatAppGenerateEntity(EasyUIBasedAppGenerateEntity):
     """
 
     conversation_id: Optional[str] = None
+    parent_message_id: Optional[str] = None
 
 
 class AdvancedChatAppGenerateEntity(AppGenerateEntity):
@@ -149,6 +151,7 @@ class AdvancedChatAppGenerateEntity(AppGenerateEntity):
     app_config: WorkflowUIBasedAppConfig
 
     conversation_id: Optional[str] = None
+    parent_message_id: Optional[str] = None
     query: str
 
     class SingleIterationRunEntity(BaseModel):
diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py
index 78a6d6e683..39bd6fee69 100644
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@@ -47,6 +47,8 @@ class LLMGenerator:
             )
         answer = response.message.content
         cleaned_answer = re.sub(r"^.*(\{.*\}).*$", r"\1", answer, flags=re.DOTALL)
+        if cleaned_answer is None:
+            return ""
         result_dict = json.loads(cleaned_answer)
         answer = result_dict["Your Output"]
         name = answer.strip()
diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py
index d3185c3b11..60b36c50f0 100644
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -11,6 +11,7 @@ from core.model_runtime.entities.message_entities import (
     TextPromptMessageContent,
     UserPromptMessage,
 )
+from core.prompt.utils.extract_thread_messages import extract_thread_messages
 from extensions.ext_database import db
 from models.model import AppMode, Conversation, Message, MessageFile
 from models.workflow import WorkflowRun
@@ -33,8 +34,17 @@ class TokenBufferMemory:
 
         # fetch limited messages, and return reversed
         query = (
-            db.session.query(Message.id, Message.query, Message.answer, Message.created_at, Message.workflow_run_id)
-            .filter(Message.conversation_id == self.conversation.id, Message.answer != "")
+            db.session.query(
+                Message.id,
+                Message.query,
+                Message.answer,
+                Message.created_at,
+                Message.workflow_run_id,
+                Message.parent_message_id,
+            )
+            .filter(
+                Message.conversation_id == self.conversation.id,
+            )
             .order_by(Message.created_at.desc())
         )
 
@@ -45,7 +55,12 @@ class TokenBufferMemory:
 
         messages = query.limit(message_limit).all()
 
-        messages = list(reversed(messages))
+        # instead of all messages from the conversation, we only need to extract messages
+        # that belong to the thread of last message
+        thread_messages = extract_thread_messages(messages)
+        thread_messages.pop(0)
+        messages = list(reversed(thread_messages))
+
         message_file_parser = MessageFileParser(tenant_id=app_record.tenant_id, app_id=app_record.id)
         prompt_messages = []
         for message in messages:
diff --git a/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md b/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md
index 56f379a92f..17fc088a63 100644
--- a/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md
+++ b/api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md
@@ -62,7 +62,7 @@ pricing:  # 价格信息
 
 建议将所有模型配置都准备完毕后再开始模型代码的实现。
 
-同样，也可以参考  `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息，完整的 YAML 规则见：[Schema](schema.md#AIModel)。
+同样，也可以参考  `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息，完整的 YAML 规则见：[Schema](schema.md#aimodelentity)。
 
 ### 实现模型调用代码
 
diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index d10314ba03..1f5f64019a 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -37,3 +37,4 @@
 - siliconflow
 - perfxcloud
 - zhinao
+- fireworks
diff --git a/api/core/model_runtime/model_providers/fireworks/__init__.py b/api/core/model_runtime/model_providers/fireworks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg
new file mode 100644
index 0000000000..582605cc42
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg
@@ -0,0 +1,3 @@
+<svg width="130" role="graphics-symbol" aria-label="Fireworks AI Home" viewBox="0 0 835 130" xmlns="http://www.w3.org/2000/svg">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M112.65 0L91.33 51.09L69.99 0H56.3L79.69 55.85C81.63 60.51 86.18 63.52 91.25 63.52C96.32 63.52 100.86 60.51 102.81 55.87L126.34 0H112.65ZM121.76 77.84L160.76 38.41L155.44 25.86L112.84 69.01C109.28 72.62 108.26 77.94 110.23 82.6C112.19 87.22 116.72 90.21 121.77 90.21L121.79 90.23L182.68 90.08L177.36 77.53L121.77 77.84H121.76ZM21.92 38.38L27.24 25.83L69.84 68.98C73.4 72.58 74.43 77.92 72.45 82.57C70.49 87.2 65.94 90.18 60.91 90.18L0.02 90.04L0 90.06L5.32 77.51L60.91 77.82L21.92 38.38Z" fill="#6720FF"></path>
+<path d="M231.32 85.2198L231.33 85.2298H241.8V49.1698H275.62V39.8198H241.8V16.3598H279V7.00977H231.32V85.2198Z" class="fill-black dark:fill-white"></path><path d="M299.68 28.73H289.86V85.22H299.68V28.73Z" class="fill-black dark:fill-white"></path><path d="M324.58 36.2198H324.59C324.37 36.7598 324.16 37.0898 323.5 37.0898C322.95 37.0898 322.74 36.8798 322.74 36.3398V28.7298H312.92V85.2198H322.72V53.1598C322.72 42.3098 327.75 38.0698 337.24 38.0698H345.1V28.5098H338.77C331.03 28.5098 327.1 30.7898 324.58 36.2198Z" class="fill-black dark:fill-white"></path><path d="M377.76 78.3996C367.23 78.3996 359.37 72.4196 358.71 59.7196H404.6V54.2796C404.6 38.5296 395 27.1196 377.53 27.1196C360.06 27.1196 348.93 38.5296 348.93 56.9896C348.93 75.4496 359.73 86.8596 377.74 86.8596C395.75 86.8596 403.15 75.8996 404.81 67.3196H394.57C392.98 73.7396 388.29 78.3996 377.76 78.3996ZM377.53 35.5696C387.91 35.5696 394.33 41.1196 394.78 51.5496H358.98C360.61 40.8896 368.14 35.5696 377.53 35.5696Z" class="fill-black dark:fill-white"></path><path d="M474.29 74.68C474.05 75.66 473.75 75.99 472.97 75.99C472.19 75.99 471.86 75.66 471.65 74.68L460.73 28.73H443.81L432.89 74.68C432.65 75.66 432.35 75.99 431.57 75.99C430.79 75.99 430.46 75.66 430.25 74.68L419.33 28.73H409.73V30.91H409.79L423.11 85.22H439.97L451.22 37.85C451.43 37.08 451.64 36.87 452.3 36.87C452.84 36.87 453.17 37.1 453.38 37.85L464.63 85.22H481.49L494.81 30.91V28.73H485.21L474.29 74.68Z" class="fill-black dark:fill-white"></path><path d="M529.05 27.1099C512.56 27.1099 499.47 37.4199 499.47 56.9799C499.47 76.5399 512.55 86.8499 529.05 86.8499C545.55 86.8499 558.64 76.5399 558.64 56.9799C558.64 37.4199 545.54 27.1099 529.05 27.1099ZM529.07 78.1599C517.61 78.1599 509.42 70.5699 509.42 56.9799C509.42 43.3899 517.61 35.7999 529.07 35.7999C540.53 35.7999 548.72 43.4099 548.72 56.9799C548.72 70.5499 540.53 78.1599 529.07 78.1599Z" class="fill-black dark:fill-white"></path><path d="M580.68 36.2198C580.47 36.7598 580.26 37.0898 579.6 37.0898C579.05 37.0898 578.841 36.8798 578.841 36.3398V28.7298H569.021V85.2098H578.82V53.1598C578.82 42.3098 583.851 38.0698 593.341 38.0698H601.201V28.5098H594.87C587.13 28.5098 583.2 30.7898 580.68 36.2198Z" class="fill-black dark:fill-white"></path><path d="M618.591 55.0198V7.00977H608.771V85.2698H618.591V67.2298L629.24 58.1498L650.42 85.2498H661.16V83.0698L636.49 51.9398L661.16 30.9098V28.7298H648.54L618.591 55.0198Z" class="fill-black dark:fill-white"></path><path d="M695.19 52.8899L687.12 51.3699C679.38 49.8999 675.99 48.2799 675.99 43.5999C675.99 38.9199 679.82 35.4499 688.98 35.4499C698.14 35.4499 703.38 38.9399 704.14 46.6499H714.14C713.03 32.8799 702.34 27.1299 688.94 27.1299C675.54 27.1299 666.13 32.8899 666.13 43.7399C666.13 54.5899 673.83 58.3499 684.91 60.4099L692.98 61.9299C700.84 63.3999 704.77 65.0899 704.77 69.9699C704.77 74.8499 700.83 78.4899 691.35 78.4899C681.87 78.4899 675.58 74.5799 674.82 67.0799H664.83C665.76 80.5499 676.73 86.8499 691.36 86.8499C705.99 86.8499 714.61 80.6099 714.61 69.4099C714.61 58.2099 705.55 54.8399 695.19 52.8899Z" class="fill-black dark:fill-white"></path><path d="M834.64 7.00977H823.63V85.2698H834.64V7.00977Z" class="fill-black dark:fill-white"></path><path d="M770.23 7.77L739.71 83.8398V85.2698H750.61L758.34 64.8398H795.08L802.81 85.2698H814.04V83.8598L783.3 7.00977H770.23ZM761.97 55.3798L775.09 21.0098H775.08C775.3 20.4198 775.87 20.0298 776.5 20.0298H777.04C777.67 20.0298 778.24 20.4198 778.46 21.0098L791.48 55.3798H761.97Z" class="fill-black dark:fill-white"></path><path d="M299.68 7.00977H289.86V18.5298H299.68V7.00977Z" class="fill-black dark:fill-white"></path></svg>
\ No newline at end of file
diff --git a/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg
new file mode 100644
index 0000000000..86eeba66f9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg
@@ -0,0 +1,5 @@
+<svg width="638" height="315" viewBox="0 0 638 315" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M318.563 221.755C300.863 221.755 284.979 211.247 278.206 194.978L196.549 0H244.342L318.842 178.361L393.273 0H441.066L358.92 195.048C352.112 211.247 336.263 221.755 318.563 221.755Z" fill="#6720FF"/>
+<path d="M425.111 314.933C407.481 314.933 391.667 304.494 384.824 288.366C377.947 272.097 381.507 253.524 393.936 240.921L542.657 90.2803L561.229 134.094L425.076 271.748L619.147 270.666L637.72 314.479L425.146 315.003L425.076 314.933H425.111Z" fill="#6720FF"/>
+<path d="M0 314.408L18.5727 270.595L212.643 271.677L76.525 133.988L95.0977 90.1748L243.819 240.816C256.247 253.384 259.843 272.026 252.93 288.26C246.088 304.424 230.203 314.827 212.643 314.827L0.0698221 314.339L0 314.408Z" fill="#6720FF"/>
+</svg>
diff --git a/api/core/model_runtime/model_providers/fireworks/_common.py b/api/core/model_runtime/model_providers/fireworks/_common.py
new file mode 100644
index 0000000000..378ced3a40
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/_common.py
@@ -0,0 +1,52 @@
+from collections.abc import Mapping
+
+import openai
+
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+
+
+class _CommonFireworks:
+    def _to_credential_kwargs(self, credentials: Mapping) -> dict:
+        """
+        Transform credentials to kwargs for model instance
+
+        :param credentials:
+        :return:
+        """
+        credentials_kwargs = {
+            "api_key": credentials["fireworks_api_key"],
+            "base_url": "https://api.fireworks.ai/inference/v1",
+            "max_retries": 1,
+        }
+
+        return credentials_kwargs
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
+            InvokeServerUnavailableError: [openai.InternalServerError],
+            InvokeRateLimitError: [openai.RateLimitError],
+            InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
+            InvokeBadRequestError: [
+                openai.BadRequestError,
+                openai.NotFoundError,
+                openai.UnprocessableEntityError,
+                openai.APIError,
+            ],
+        }
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.py b/api/core/model_runtime/model_providers/fireworks/fireworks.py
new file mode 100644
index 0000000000..15f25badab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.py
@@ -0,0 +1,27 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class FireworksProvider(ModelProvider):
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+            model_instance.validate_credentials(
+                model="accounts/fireworks/models/llama-v3p1-8b-instruct", credentials=credentials
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+            raise ex
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
new file mode 100644
index 0000000000..f886fa23b5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@@ -0,0 +1,29 @@
+provider: fireworks
+label:
+  zh_Hans: Fireworks AI
+  en_US: Fireworks AI
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.svg
+background: "#FCFDFF"
+help:
+  title:
+    en_US: Get your API Key from Fireworks AI
+    zh_Hans: 从 Fireworks AI 获取 API Key
+  url:
+    en_US: https://fireworks.ai/account/api-keys
+supported_model_types:
+  - llm
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: fireworks_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/__init__.py b/api/core/model_runtime/model_providers/fireworks/llm/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml b/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml
new file mode 100644
index 0000000000..9f7c1af68c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml
@@ -0,0 +1,16 @@
+- llama-v3p1-405b-instruct
+- llama-v3p1-70b-instruct
+- llama-v3p1-8b-instruct
+- llama-v3-70b-instruct
+- mixtral-8x22b-instruct
+- mixtral-8x7b-instruct
+- firefunction-v2
+- firefunction-v1
+- gemma2-9b-it
+- llama-v3-70b-instruct-hf
+- llama-v3-8b-instruct
+- llama-v3-8b-instruct-hf
+- mixtral-8x7b-instruct-hf
+- mythomax-l2-13b
+- phi-3-vision-128k-instruct
+- yi-large
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml
new file mode 100644
index 0000000000..f6bac12832
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/firefunction-v1
+label:
+  zh_Hans: Firefunction V1
+  en_US: Firefunction V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.5'
+  output: '0.5'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml
new file mode 100644
index 0000000000..2979cb46d5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/firefunction-v2
+label:
+  zh_Hans: Firefunction V2
+  en_US: Firefunction V2
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml b/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml
new file mode 100644
index 0000000000..ee41a7e2fd
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml
@@ -0,0 +1,45 @@
+model: accounts/fireworks/models/gemma2-9b-it
+label:
+  zh_Hans: Gemma2 9B Instruct
+  en_US: Gemma2 9B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml
new file mode 100644
index 0000000000..2ae89b8816
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-70b-instruct-hf
+label:
+  zh_Hans: Llama3 70B Instruct(HF version)
+  en_US: Llama3 70B Instruct(HF version)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml
new file mode 100644
index 0000000000..7c24b08ca5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-70b-instruct
+label:
+  zh_Hans: Llama3 70B Instruct
+  en_US: Llama3 70B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml
new file mode 100644
index 0000000000..83507ef3e5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-8b-instruct-hf
+label:
+  zh_Hans: Llama3 8B Instruct(HF version)
+  en_US: Llama3 8B Instruct(HF version)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml
new file mode 100644
index 0000000000..d8ac9537b8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-8b-instruct
+label:
+  zh_Hans: Llama3 8B Instruct
+  en_US: Llama3 8B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml
new file mode 100644
index 0000000000..c4ddb3e924
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-405b-instruct
+label:
+  zh_Hans: Llama3.1 405B Instruct
+  en_US: Llama3.1 405B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '3'
+  output: '3'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml
new file mode 100644
index 0000000000..62f84f87fa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-70b-instruct
+label:
+  zh_Hans: Llama3.1 70B Instruct
+  en_US: Llama3.1 70B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml
new file mode 100644
index 0000000000..9bb99c91b6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-8b-instruct
+label:
+  zh_Hans: Llama3.1 8B Instruct
+  en_US: Llama3.1 8B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llm.py b/api/core/model_runtime/model_providers/fireworks/llm/llm.py
new file mode 100644
index 0000000000..2dcf1adba6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llm.py
@@ -0,0 +1,610 @@
+import logging
+from collections.abc import Generator
+from typing import Optional, Union, cast
+
+from openai import OpenAI, Stream
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall
+from openai.types.chat.chat_completion_message import FunctionCall
+
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    ImagePromptMessageContent,
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageTool,
+    SystemPromptMessage,
+    TextPromptMessageContent,
+    ToolPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+logger = logging.getLogger(__name__)
+
+FIREWORKS_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+
+<instructions>
+{{instructions}}
+</instructions>
+"""  # noqa: E501
+
+
+class FireworksLargeLanguageModel(_CommonFireworks, LargeLanguageModel):
+    """
+    Model class for Fireworks large language model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+
+        return self._chat_generate(
+            model=model,
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user,
+        )
+
+    def _code_block_mode_wrapper(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Code block mode wrapper for invoking large language model
+        """
+        if "response_format" in model_parameters and model_parameters["response_format"] in {"JSON", "XML"}:
+            stop = stop or []
+            self._transform_chat_json_prompts(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                response_format=model_parameters["response_format"],
+            )
+            model_parameters.pop("response_format")
+
+            return self._invoke(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+            )
+
+    def _transform_chat_json_prompts(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: list[PromptMessageTool] | None = None,
+        stop: list[str] | None = None,
+        stream: bool = True,
+        user: str | None = None,
+        response_format: str = "JSON",
+    ) -> None:
+        """
+        Transform json prompts
+        """
+        if stop is None:
+            stop = []
+        if "```\n" not in stop:
+            stop.append("```\n")
+        if "\n```" not in stop:
+            stop.append("\n```")
+
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+            prompt_messages[0] = SystemPromptMessage(
+                content=FIREWORKS_BLOCK_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content).replace(
+                    "{{block}}", response_format
+                )
+            )
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n"))
+        else:
+            prompt_messages.insert(
+                0,
+                SystemPromptMessage(
+                    content=FIREWORKS_BLOCK_MODE_PROMPT.replace(
+                        "{{instructions}}", f"Please output a valid {response_format} object."
+                    ).replace("{{block}}", response_format)
+                ),
+            )
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+
+    def get_num_tokens(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return:
+        """
+        return self._num_tokens_from_messages(model, prompt_messages, tools)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            client.chat.completions.create(
+                messages=[{"role": "user", "content": "ping"}], model=model, temperature=0, max_tokens=10, stream=False
+            )
+        except Exception as e:
+            raise CredentialsValidateFailedError(str(e))
+
+    def _chat_generate(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+    ) -> Union[LLMResult, Generator]:
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+
+        if tools:
+            extra_model_kwargs["functions"] = [
+                {"name": tool.name, "description": tool.description, "parameters": tool.parameters} for tool in tools
+            ]
+
+        if stop:
+            extra_model_kwargs["stop"] = stop
+
+        if user:
+            extra_model_kwargs["user"] = user
+
+        # chat model
+        response = client.chat.completions.create(
+            messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
+            model=model,
+            stream=stream,
+            **model_parameters,
+            **extra_model_kwargs,
+        )
+
+        if stream:
+            return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)
+        return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)
+
+    def _handle_chat_generate_response(
+        self,
+        model: str,
+        credentials: dict,
+        response: ChatCompletion,
+        prompt_messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> LLMResult:
+        """
+        Handle llm chat response
+
+        :param model: model name
+        :param credentials: credentials
+        :param response: response
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return: llm response
+        """
+        assistant_message = response.choices[0].message
+        # assistant_message_tool_calls = assistant_message.tool_calls
+        assistant_message_function_call = assistant_message.function_call
+
+        # extract tool calls from response
+        # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+        function_call = self._extract_response_function_call(assistant_message_function_call)
+        tool_calls = [function_call] if function_call else []
+
+        # transform assistant message to prompt message
+        assistant_prompt_message = AssistantPromptMessage(content=assistant_message.content, tool_calls=tool_calls)
+
+        # calculate num tokens
+        if response.usage:
+            # transform usage
+            prompt_tokens = response.usage.prompt_tokens
+            completion_tokens = response.usage.completion_tokens
+        else:
+            # calculate num tokens
+            prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+            completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+
+        # transform response
+        response = LLMResult(
+            model=response.model,
+            prompt_messages=prompt_messages,
+            message=assistant_prompt_message,
+            usage=usage,
+            system_fingerprint=response.system_fingerprint,
+        )
+
+        return response
+
+    def _handle_chat_generate_stream_response(
+        self,
+        model: str,
+        credentials: dict,
+        response: Stream[ChatCompletionChunk],
+        prompt_messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> Generator:
+        """
+        Handle llm chat stream response
+
+        :param model: model name
+        :param response: response
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return: llm response chunk generator
+        """
+        full_assistant_content = ""
+        delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None
+        prompt_tokens = 0
+        completion_tokens = 0
+        final_tool_calls = []
+        final_chunk = LLMResultChunk(
+            model=model,
+            prompt_messages=prompt_messages,
+            delta=LLMResultChunkDelta(
+                index=0,
+                message=AssistantPromptMessage(content=""),
+            ),
+        )
+
+        for chunk in response:
+            if len(chunk.choices) == 0:
+                if chunk.usage:
+                    # calculate num tokens
+                    prompt_tokens = chunk.usage.prompt_tokens
+                    completion_tokens = chunk.usage.completion_tokens
+                continue
+
+            delta = chunk.choices[0]
+            has_finish_reason = delta.finish_reason is not None
+
+            if (
+                not has_finish_reason
+                and (delta.delta.content is None or delta.delta.content == "")
+                and delta.delta.function_call is None
+            ):
+                continue
+
+            # assistant_message_tool_calls = delta.delta.tool_calls
+            assistant_message_function_call = delta.delta.function_call
+
+            # extract tool calls from response
+            if delta_assistant_message_function_call_storage is not None:
+                # handle process of stream function call
+                if assistant_message_function_call:
+                    # message has not ended ever
+                    delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments
+                    continue
+                else:
+                    # message has ended
+                    assistant_message_function_call = delta_assistant_message_function_call_storage
+                    delta_assistant_message_function_call_storage = None
+            else:
+                if assistant_message_function_call:
+                    # start of stream function call
+                    delta_assistant_message_function_call_storage = assistant_message_function_call
+                    if delta_assistant_message_function_call_storage.arguments is None:
+                        delta_assistant_message_function_call_storage.arguments = ""
+                    if not has_finish_reason:
+                        continue
+
+            # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+            function_call = self._extract_response_function_call(assistant_message_function_call)
+            tool_calls = [function_call] if function_call else []
+            if tool_calls:
+                final_tool_calls.extend(tool_calls)
+
+            # transform assistant message to prompt message
+            assistant_prompt_message = AssistantPromptMessage(content=delta.delta.content or "", tool_calls=tool_calls)
+
+            full_assistant_content += delta.delta.content or ""
+
+            if has_finish_reason:
+                final_chunk = LLMResultChunk(
+                    model=chunk.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=chunk.system_fingerprint,
+                    delta=LLMResultChunkDelta(
+                        index=delta.index,
+                        message=assistant_prompt_message,
+                        finish_reason=delta.finish_reason,
+                    ),
+                )
+            else:
+                yield LLMResultChunk(
+                    model=chunk.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=chunk.system_fingerprint,
+                    delta=LLMResultChunkDelta(
+                        index=delta.index,
+                        message=assistant_prompt_message,
+                    ),
+                )
+
+        if not prompt_tokens:
+            prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+
+        if not completion_tokens:
+            full_assistant_prompt_message = AssistantPromptMessage(
+                content=full_assistant_content, tool_calls=final_tool_calls
+            )
+            completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+        final_chunk.delta.usage = usage
+
+        yield final_chunk
+
+    def _extract_response_tool_calls(
+        self, response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]
+    ) -> list[AssistantPromptMessage.ToolCall]:
+        """
+        Extract tool calls from response
+
+        :param response_tool_calls: response tool calls
+        :return: list of tool calls
+        """
+        tool_calls = []
+        if response_tool_calls:
+            for response_tool_call in response_tool_calls:
+                function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+                    name=response_tool_call.function.name, arguments=response_tool_call.function.arguments
+                )
+
+                tool_call = AssistantPromptMessage.ToolCall(
+                    id=response_tool_call.id, type=response_tool_call.type, function=function
+                )
+                tool_calls.append(tool_call)
+
+        return tool_calls
+
+    def _extract_response_function_call(
+        self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall
+    ) -> AssistantPromptMessage.ToolCall:
+        """
+        Extract function call from response
+
+        :param response_function_call: response function call
+        :return: tool call
+        """
+        tool_call = None
+        if response_function_call:
+            function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+                name=response_function_call.name, arguments=response_function_call.arguments
+            )
+
+            tool_call = AssistantPromptMessage.ToolCall(
+                id=response_function_call.name, type="function", function=function
+            )
+
+        return tool_call
+
+    def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+        """
+        Convert PromptMessage to dict for Fireworks API
+        """
+        if isinstance(message, UserPromptMessage):
+            message = cast(UserPromptMessage, message)
+            if isinstance(message.content, str):
+                message_dict = {"role": "user", "content": message.content}
+            else:
+                sub_messages = []
+                for message_content in message.content:
+                    if message_content.type == PromptMessageContentType.TEXT:
+                        message_content = cast(TextPromptMessageContent, message_content)
+                        sub_message_dict = {"type": "text", "text": message_content.data}
+                        sub_messages.append(sub_message_dict)
+                    elif message_content.type == PromptMessageContentType.IMAGE:
+                        message_content = cast(ImagePromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "image_url",
+                            "image_url": {"url": message_content.data, "detail": message_content.detail.value},
+                        }
+                        sub_messages.append(sub_message_dict)
+
+                message_dict = {"role": "user", "content": sub_messages}
+        elif isinstance(message, AssistantPromptMessage):
+            message = cast(AssistantPromptMessage, message)
+            message_dict = {"role": "assistant", "content": message.content}
+            if message.tool_calls:
+                # message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+                #                               message.tool_calls]
+                function_call = message.tool_calls[0]
+                message_dict["function_call"] = {
+                    "name": function_call.function.name,
+                    "arguments": function_call.function.arguments,
+                }
+        elif isinstance(message, SystemPromptMessage):
+            message = cast(SystemPromptMessage, message)
+            message_dict = {"role": "system", "content": message.content}
+        elif isinstance(message, ToolPromptMessage):
+            message = cast(ToolPromptMessage, message)
+            # message_dict = {
+            #     "role": "tool",
+            #     "content": message.content,
+            #     "tool_call_id": message.tool_call_id
+            # }
+            message_dict = {"role": "function", "content": message.content, "name": message.tool_call_id}
+        else:
+            raise ValueError(f"Got unknown type {message}")
+
+        if message.name:
+            message_dict["name"] = message.name
+
+        return message_dict
+
+    def _num_tokens_from_messages(
+        self,
+        model: str,
+        messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+        credentials: dict = None,
+    ) -> int:
+        """
+        Approximate num tokens with GPT2 tokenizer.
+        """
+
+        tokens_per_message = 3
+        tokens_per_name = 1
+
+        num_tokens = 0
+        messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
+        for message in messages_dict:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                # Cast str(value) in case the message value is not a string
+                # This occurs with function messages
+                # TODO: The current token calculation method for the image type is not implemented,
+                #  which need to download the image and then get the resolution for calculation,
+                #  and will increase the request delay
+                if isinstance(value, list):
+                    text = ""
+                    for item in value:
+                        if isinstance(item, dict) and item["type"] == "text":
+                            text += item["text"]
+
+                    value = text
+
+                if key == "tool_calls":
+                    for tool_call in value:
+                        for t_key, t_value in tool_call.items():
+                            num_tokens += self._get_num_tokens_by_gpt2(t_key)
+                            if t_key == "function":
+                                for f_key, f_value in t_value.items():
+                                    num_tokens += self._get_num_tokens_by_gpt2(f_key)
+                                    num_tokens += self._get_num_tokens_by_gpt2(f_value)
+                            else:
+                                num_tokens += self._get_num_tokens_by_gpt2(t_key)
+                                num_tokens += self._get_num_tokens_by_gpt2(t_value)
+                else:
+                    num_tokens += self._get_num_tokens_by_gpt2(str(value))
+
+                if key == "name":
+                    num_tokens += tokens_per_name
+
+        # every reply is primed with <im_start>assistant
+        num_tokens += 3
+
+        if tools:
+            num_tokens += self._num_tokens_for_tools(tools)
+
+        return num_tokens
+
+    def _num_tokens_for_tools(self, tools: list[PromptMessageTool]) -> int:
+        """
+        Calculate num tokens for tool calling with tiktoken package.
+
+        :param tools: tools for tool calling
+        :return: number of tokens
+        """
+        num_tokens = 0
+        for tool in tools:
+            num_tokens += self._get_num_tokens_by_gpt2("type")
+            num_tokens += self._get_num_tokens_by_gpt2("function")
+            num_tokens += self._get_num_tokens_by_gpt2("function")
+
+            # calculate num tokens for function object
+            num_tokens += self._get_num_tokens_by_gpt2("name")
+            num_tokens += self._get_num_tokens_by_gpt2(tool.name)
+            num_tokens += self._get_num_tokens_by_gpt2("description")
+            num_tokens += self._get_num_tokens_by_gpt2(tool.description)
+            parameters = tool.parameters
+            num_tokens += self._get_num_tokens_by_gpt2("parameters")
+            if "title" in parameters:
+                num_tokens += self._get_num_tokens_by_gpt2("title")
+                num_tokens += self._get_num_tokens_by_gpt2(parameters.get("title"))
+            num_tokens += self._get_num_tokens_by_gpt2("type")
+            num_tokens += self._get_num_tokens_by_gpt2(parameters.get("type"))
+            if "properties" in parameters:
+                num_tokens += self._get_num_tokens_by_gpt2("properties")
+                for key, value in parameters.get("properties").items():
+                    num_tokens += self._get_num_tokens_by_gpt2(key)
+                    for field_key, field_value in value.items():
+                        num_tokens += self._get_num_tokens_by_gpt2(field_key)
+                        if field_key == "enum":
+                            for enum_field in field_value:
+                                num_tokens += 3
+                                num_tokens += self._get_num_tokens_by_gpt2(enum_field)
+                        else:
+                            num_tokens += self._get_num_tokens_by_gpt2(field_key)
+                            num_tokens += self._get_num_tokens_by_gpt2(str(field_value))
+            if "required" in parameters:
+                num_tokens += self._get_num_tokens_by_gpt2("required")
+                for required_field in parameters["required"]:
+                    num_tokens += 3
+                    num_tokens += self._get_num_tokens_by_gpt2(required_field)
+
+        return num_tokens
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml
new file mode 100644
index 0000000000..87d977e26c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x22b-instruct
+label:
+  zh_Hans: Mixtral MoE 8x22B Instruct
+  en_US: Mixtral MoE 8x22B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 65536
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '1.2'
+  output: '1.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml
new file mode 100644
index 0000000000..e3d5a90858
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x7b-instruct-hf
+label:
+  zh_Hans: Mixtral MoE 8x7B Instruct(HF version)
+  en_US: Mixtral MoE 8x7B Instruct(HF version)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.5'
+  output: '0.5'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml
new file mode 100644
index 0000000000..45f632ceff
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x7b-instruct
+label:
+  zh_Hans: Mixtral MoE 8x7B Instruct
+  en_US: Mixtral MoE 8x7B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.5'
+  output: '0.5'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml
new file mode 100644
index 0000000000..9c3486ba10
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mythomax-l2-13b
+label:
+  zh_Hans: MythoMax L2 13b
+  en_US: MythoMax L2 13b
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml
new file mode 100644
index 0000000000..e399f2edb1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/phi-3-vision-128k-instruct
+label:
+  zh_Hans: Phi3.5 Vision Instruct
+  en_US: Phi3.5 Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml b/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml
new file mode 100644
index 0000000000..bb4b6f994e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml
@@ -0,0 +1,45 @@
+model: accounts/yi-01-ai/models/yi-large
+label:
+  zh_Hans: Yi-Large
+  en_US: Yi-Large
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '3'
+  output: '3'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
new file mode 100644
index 0000000000..bbc697e934
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-flash-8b-exp-0827
+label:
+  en_US: Gemini 1.5 Flash 8B 0827
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
new file mode 100644
index 0000000000..c5695e5dda
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-flash-exp-0827
+label:
+  en_US: Gemini 1.5 Flash 0827
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
new file mode 100644
index 0000000000..0a918e0d7b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-pro-exp-0801
+label:
+  en_US: Gemini 1.5 Pro 0801
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
new file mode 100644
index 0000000000..7452ce46e7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -0,0 +1,39 @@
+model: gemini-1.5-pro-exp-0827
+label:
+  en_US: Gemini 1.5 Pro 0827
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index d65dc02674..b3e1ecf3af 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -9,7 +9,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 1048576
+  context_size: 2097152
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml
index ca8600a534..f494984443 100644
--- a/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml
@@ -3,3 +3,4 @@
 - hunyuan-standard-256k
 - hunyuan-pro
 - hunyuan-turbo
+- hunyuan-vision
diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml
new file mode 100644
index 0000000000..9edc7f4710
--- /dev/null
+++ b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml
@@ -0,0 +1,39 @@
+model: hunyuan-vision
+label:
+  zh_Hans: hunyuan-vision
+  en_US: hunyuan-vision
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.018'
+  output: '0.018'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py
index b57e5e1c2b..2014de8516 100644
--- a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py
@@ -1,6 +1,7 @@
 import json
 import logging
 from collections.abc import Generator
+from typing import cast
 
 from tencentcloud.common import credential
 from tencentcloud.common.exception import TencentCloudSDKException
@@ -11,9 +12,12 @@ from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
     AssistantPromptMessage,
+    ImagePromptMessageContent,
     PromptMessage,
+    PromptMessageContentType,
     PromptMessageTool,
     SystemPromptMessage,
+    TextPromptMessageContent,
     ToolPromptMessage,
     UserPromptMessage,
 )
@@ -143,6 +147,25 @@ class HunyuanLargeLanguageModel(LargeLanguageModel):
                 tool_execute_result = {"result": message.content}
                 content = json.dumps(tool_execute_result, ensure_ascii=False)
                 dict_list.append({"Role": message.role.value, "Content": content, "ToolCallId": message.tool_call_id})
+            elif isinstance(message, UserPromptMessage):
+                message = cast(UserPromptMessage, message)
+                if isinstance(message.content, str):
+                    dict_list.append({"Role": message.role.value, "Content": message.content})
+                else:
+                    sub_messages = []
+                    for message_content in message.content:
+                        if message_content.type == PromptMessageContentType.TEXT:
+                            message_content = cast(TextPromptMessageContent, message_content)
+                            sub_message_dict = {"Type": "text", "Text": message_content.data}
+                            sub_messages.append(sub_message_dict)
+                        elif message_content.type == PromptMessageContentType.IMAGE:
+                            message_content = cast(ImagePromptMessageContent, message_content)
+                            sub_message_dict = {
+                                "Type": "image_url",
+                                "ImageUrl": {"Url": message_content.data},
+                            }
+                            sub_messages.append(sub_message_dict)
+                    dict_list.append({"Role": message.role.value, "Contents": sub_messages})
             else:
                 dict_list.append({"Role": message.role.value, "Content": message.content})
         return dict_list
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
index 751003d71e..bdb06b7fff 100644
--- a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
@@ -1,3 +1,8 @@
+- pixtral-12b-2409
+- codestral-latest
+- mistral-embed
+- open-mistral-nemo
+- open-codestral-mamba
 - open-mistral-7b
 - open-mixtral-8x7b
 - open-mixtral-8x22b
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml b/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml
new file mode 100644
index 0000000000..5f1260233f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml
@@ -0,0 +1,51 @@
+model: codestral-latest
+label:
+  zh_Hans: codestral-latest
+  en_US: codestral-latest
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 4096
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml b/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml
new file mode 100644
index 0000000000..d759103d08
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml
@@ -0,0 +1,51 @@
+model: mistral-embed
+label:
+  zh_Hans: mistral-embed
+  en_US: mistral-embed
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 1024
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml b/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml
new file mode 100644
index 0000000000..d7ffb9ea02
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml
@@ -0,0 +1,51 @@
+model: open-codestral-mamba
+label:
+  zh_Hans: open-codestral-mamba
+  en_US: open-codestral-mamba
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 256000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 16384
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml b/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml
new file mode 100644
index 0000000000..dcda4fbce7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml
@@ -0,0 +1,51 @@
+model: open-mistral-nemo
+label:
+  zh_Hans: open-mistral-nemo
+  en_US: open-mistral-nemo
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8192
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml
new file mode 100644
index 0000000000..0b002b49ca
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml
@@ -0,0 +1,51 @@
+model: pixtral-12b-2409
+label:
+  zh_Hans: pixtral-12b-2409
+  en_US: pixtral-12b-2409
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8192
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index 1ed77a2ee8..ff732e6925 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -472,12 +472,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                 ParameterRule(
                     name=DefaultParameterName.TEMPERATURE.value,
                     use_template=DefaultParameterName.TEMPERATURE.value,
-                    label=I18nObject(en_US="Temperature"),
+                    label=I18nObject(en_US="Temperature", zh_Hans="温度"),
                     type=ParameterType.FLOAT,
                     help=I18nObject(
                         en_US="The temperature of the model. "
                         "Increasing the temperature will make the model answer "
-                        "more creatively. (Default: 0.8)"
+                        "more creatively. (Default: 0.8)",
+                        zh_Hans="模型的温度。增加温度将使模型的回答更具创造性。（默认值：0.8）",
                     ),
                     default=0.1,
                     min=0,
@@ -486,12 +487,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                 ParameterRule(
                     name=DefaultParameterName.TOP_P.value,
                     use_template=DefaultParameterName.TOP_P.value,
-                    label=I18nObject(en_US="Top P"),
+                    label=I18nObject(en_US="Top P", zh_Hans="Top P"),
                     type=ParameterType.FLOAT,
                     help=I18nObject(
                         en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to "
                         "more diverse text, while a lower value (e.g., 0.5) will generate more "
-                        "focused and conservative text. (Default: 0.9)"
+                        "focused and conservative text. (Default: 0.9)",
+                        zh_Hans="与top-k一起工作。较高的值（例如，0.95）会导致生成更多样化的文本，而较低的值（例如，0.5）会生成更专注和保守的文本。（默认值：0.9）",
                     ),
                     default=0.9,
                     min=0,
@@ -499,12 +501,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                 ),
                 ParameterRule(
                     name="top_k",
-                    label=I18nObject(en_US="Top K"),
+                    label=I18nObject(en_US="Top K", zh_Hans="Top K"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="Reduces the probability of generating nonsense. "
                         "A higher value (e.g. 100) will give more diverse answers, "
-                        "while a lower value (e.g. 10) will be more conservative. (Default: 40)"
+                        "while a lower value (e.g. 10) will be more conservative. (Default: 40)",
+                        zh_Hans="减少生成无意义内容的可能性。较高的值（例如100）将提供更多样化的答案，而较低的值（例如10）将更为保守。（默认值：40）",
                     ),
                     min=1,
                     max=100,
@@ -516,7 +519,8 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                     help=I18nObject(
                         en_US="Sets how strongly to penalize repetitions. "
                         "A higher value (e.g., 1.5) will penalize repetitions more strongly, "
-                        "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)"
+                        "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
+                        zh_Hans="设置对重复内容的惩罚强度。一个较高的值（例如，1.5）会更强地惩罚重复内容，而一个较低的值（例如，0.9）则会相对宽容。（默认值：1.1）",
                     ),
                     min=-2,
                     max=2,
@@ -524,11 +528,12 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                 ParameterRule(
                     name="num_predict",
                     use_template="max_tokens",
-                    label=I18nObject(en_US="Num Predict"),
+                    label=I18nObject(en_US="Num Predict", zh_Hans="最大令牌数预测"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="Maximum number of tokens to predict when generating text. "
-                        "(Default: 128, -1 = infinite generation, -2 = fill context)"
+                        "(Default: 128, -1 = infinite generation, -2 = fill context)",
+                        zh_Hans="生成文本时预测的最大令牌数。（默认值：128，-1 = 无限生成，-2 = 填充上下文）",
                     ),
                     default=(512 if int(credentials.get("max_tokens", 4096)) >= 768 else 128),
                     min=-2,
@@ -536,121 +541,137 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                 ),
                 ParameterRule(
                     name="mirostat",
-                    label=I18nObject(en_US="Mirostat sampling"),
+                    label=I18nObject(en_US="Mirostat sampling", zh_Hans="Mirostat 采样"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="Enable Mirostat sampling for controlling perplexity. "
-                        "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"
+                        "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)",
+                        zh_Hans="启用 Mirostat 采样以控制困惑度。"
+                        "（默认值：0，0 = 禁用，1 = Mirostat，2 = Mirostat 2.0）",
                     ),
                     min=0,
                     max=2,
                 ),
                 ParameterRule(
                     name="mirostat_eta",
-                    label=I18nObject(en_US="Mirostat Eta"),
+                    label=I18nObject(en_US="Mirostat Eta", zh_Hans="学习率"),
                     type=ParameterType.FLOAT,
                     help=I18nObject(
                         en_US="Influences how quickly the algorithm responds to feedback from "
                         "the generated text. A lower learning rate will result in slower adjustments, "
                         "while a higher learning rate will make the algorithm more responsive. "
-                        "(Default: 0.1)"
+                        "(Default: 0.1)",
+                        zh_Hans="影响算法对生成文本反馈响应的速度。较低的学习率会导致调整速度变慢，而较高的学习率会使得算法更加灵敏。（默认值：0.1）",
                     ),
                     precision=1,
                 ),
                 ParameterRule(
                     name="mirostat_tau",
-                    label=I18nObject(en_US="Mirostat Tau"),
+                    label=I18nObject(en_US="Mirostat Tau", zh_Hans="文本连贯度"),
                     type=ParameterType.FLOAT,
                     help=I18nObject(
                         en_US="Controls the balance between coherence and diversity of the output. "
-                        "A lower value will result in more focused and coherent text. (Default: 5.0)"
+                        "A lower value will result in more focused and coherent text. (Default: 5.0)",
+                        zh_Hans="控制输出的连贯性和多样性之间的平衡。较低的值会导致更专注和连贯的文本。（默认值：5.0）",
                     ),
                     precision=1,
                 ),
                 ParameterRule(
                     name="num_ctx",
-                    label=I18nObject(en_US="Size of context window"),
+                    label=I18nObject(en_US="Size of context window", zh_Hans="上下文窗口大小"),
                     type=ParameterType.INT,
                     help=I18nObject(
-                        en_US="Sets the size of the context window used to generate the next token. (Default: 2048)"
+                        en_US="Sets the size of the context window used to generate the next token. (Default: 2048)",
+                        zh_Hans="设置用于生成下一个标记的上下文窗口大小。（默认值：2048）",
                     ),
                     default=2048,
                     min=1,
                 ),
                 ParameterRule(
                     name="num_gpu",
-                    label=I18nObject(en_US="GPU Layers"),
+                    label=I18nObject(en_US="GPU Layers", zh_Hans="GPU 层数"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="The number of layers to offload to the GPU(s). "
                         "On macOS it defaults to 1 to enable metal support, 0 to disable."
                         "As long as a model fits into one gpu it stays in one. "
-                        "It does not set the number of GPU(s). "
+                        "It does not set the number of GPU(s). ",
+                        zh_Hans="加载到 GPU 的层数。在 macOS 上，默认为 1 以启用 Metal 支持，设置为 0 则禁用。"
+                        "只要模型适合一个 GPU，它就保留在其中。它不设置 GPU 的数量。",
                     ),
                     min=-1,
                     default=1,
                 ),
                 ParameterRule(
                     name="num_thread",
-                    label=I18nObject(en_US="Num Thread"),
+                    label=I18nObject(en_US="Num Thread", zh_Hans="线程数"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="Sets the number of threads to use during computation. "
                         "By default, Ollama will detect this for optimal performance. "
                         "It is recommended to set this value to the number of physical CPU cores "
-                        "your system has (as opposed to the logical number of cores)."
+                        "your system has (as opposed to the logical number of cores).",
+                        zh_Hans="设置计算过程中使用的线程数。默认情况下，Ollama会检测以获得最佳性能。建议将此值设置为系统拥有的物理CPU核心数（而不是逻辑核心数）。",
                     ),
                     min=1,
                 ),
                 ParameterRule(
                     name="repeat_last_n",
-                    label=I18nObject(en_US="Repeat last N"),
+                    label=I18nObject(en_US="Repeat last N", zh_Hans="回溯内容"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="Sets how far back for the model to look back to prevent repetition. "
-                        "(Default: 64, 0 = disabled, -1 = num_ctx)"
+                        "(Default: 64, 0 = disabled, -1 = num_ctx)",
+                        zh_Hans="设置模型回溯多远的内容以防止重复。（默认值：64，0 = 禁用，-1 = num_ctx）",
                     ),
                     min=-1,
                 ),
                 ParameterRule(
                     name="tfs_z",
-                    label=I18nObject(en_US="TFS Z"),
+                    label=I18nObject(en_US="TFS Z", zh_Hans="减少标记影响"),
                     type=ParameterType.FLOAT,
                     help=I18nObject(
                         en_US="Tail free sampling is used to reduce the impact of less probable tokens "
                         "from the output. A higher value (e.g., 2.0) will reduce the impact more, "
-                        "while a value of 1.0 disables this setting. (default: 1)"
+                        "while a value of 1.0 disables this setting. (default: 1)",
+                        zh_Hans="用于减少输出中不太可能的标记的影响。较高的值（例如，2.0）会更多地减少这种影响，而1.0的值则会禁用此设置。（默认值：1）",
                     ),
                     precision=1,
                 ),
                 ParameterRule(
                     name="seed",
-                    label=I18nObject(en_US="Seed"),
+                    label=I18nObject(en_US="Seed", zh_Hans="随机数种子"),
                     type=ParameterType.INT,
                     help=I18nObject(
                         en_US="Sets the random number seed to use for generation. Setting this to "
                         "a specific number will make the model generate the same text for "
-                        "the same prompt. (Default: 0)"
+                        "the same prompt. (Default: 0)",
+                        zh_Hans="设置用于生成的随机数种子。将此设置为特定数字将使模型对相同的提示生成相同的文本。（默认值：0）",
                     ),
                 ),
                 ParameterRule(
                     name="keep_alive",
-                    label=I18nObject(en_US="Keep Alive"),
+                    label=I18nObject(en_US="Keep Alive", zh_Hans="模型存活时间"),
                     type=ParameterType.STRING,
                     help=I18nObject(
                         en_US="Sets how long the model is kept in memory after generating a response. "
                         "This must be a duration string with a unit (e.g., '10m' for 10 minutes or '24h' for 24 hours)."
                         " A negative number keeps the model loaded indefinitely, and '0' unloads the model"
                         " immediately after generating a response."
-                        " Valid time units are 's','m','h'. (Default: 5m)"
+                        " Valid time units are 's','m','h'. (Default: 5m)",
+                        zh_Hans="设置模型在生成响应后在内存中保留的时间。"
+                        "这必须是一个带有单位的持续时间字符串（例如，'10m' 表示10分钟，'24h' 表示24小时）。"
+                        "负数表示无限期地保留模型，'0'表示在生成响应后立即卸载模型。"
+                        "有效的时间单位有 's'（秒）、'm'（分钟）、'h'（小时）。（默认值：5m）",
                     ),
                 ),
                 ParameterRule(
                     name="format",
-                    label=I18nObject(en_US="Format"),
+                    label=I18nObject(en_US="Format", zh_Hans="返回格式"),
                     type=ParameterType.STRING,
                     help=I18nObject(
-                        en_US="the format to return a response in. Currently the only accepted value is json."
+                        en_US="the format to return a response in. Currently the only accepted value is json.",
+                        zh_Hans="返回响应的格式。目前唯一接受的值是json。",
                     ),
                     options=["json"],
                 ),
diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
index 5a8a754f72..c2ffe653c8 100644
--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@@ -205,7 +205,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
             parameter_rules=[
                 ParameterRule(
                     name=DefaultParameterName.TEMPERATURE.value,
-                    label=I18nObject(en_US="Temperature"),
+                    label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+                    help=I18nObject(
+                        en_US="Kernel sampling threshold. Used to determine the randomness of the results."
+                        "The higher the value, the stronger the randomness."
+                        "The higher the possibility of getting different answers to the same question.",
+                        zh_Hans="核采样阈值。用于决定结果随机性，取值越高随机性越强即相同的问题得到的不同答案的可能性越高。",
+                    ),
                     type=ParameterType.FLOAT,
                     default=float(credentials.get("temperature", 0.7)),
                     min=0,
@@ -214,7 +220,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                 ),
                 ParameterRule(
                     name=DefaultParameterName.TOP_P.value,
-                    label=I18nObject(en_US="Top P"),
+                    label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+                    help=I18nObject(
+                        en_US="The probability threshold of the nucleus sampling method during the generation process."
+                        "The larger the value is, the higher the randomness of generation will be."
+                        "The smaller the value is, the higher the certainty of generation will be.",
+                        zh_Hans="生成过程中核采样方法概率阈值。取值越大，生成的随机性越高；取值越小，生成的确定性越高。",
+                    ),
                     type=ParameterType.FLOAT,
                     default=float(credentials.get("top_p", 1)),
                     min=0,
@@ -223,7 +235,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                 ),
                 ParameterRule(
                     name=DefaultParameterName.FREQUENCY_PENALTY.value,
-                    label=I18nObject(en_US="Frequency Penalty"),
+                    label=I18nObject(en_US="Frequency Penalty", zh_Hans="频率惩罚"),
+                    help=I18nObject(
+                        en_US="For controlling the repetition rate of words used by the model."
+                        "Increasing this can reduce the repetition of the same words in the model's output.",
+                        zh_Hans="用于控制模型已使用字词的重复率。 提高此项可以降低模型在输出中重复相同字词的重复度。",
+                    ),
                     type=ParameterType.FLOAT,
                     default=float(credentials.get("frequency_penalty", 0)),
                     min=-2,
@@ -231,7 +248,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                 ),
                 ParameterRule(
                     name=DefaultParameterName.PRESENCE_PENALTY.value,
-                    label=I18nObject(en_US="Presence Penalty"),
+                    label=I18nObject(en_US="Presence Penalty", zh_Hans="存在惩罚"),
+                    help=I18nObject(
+                        en_US="Used to control the repetition rate when generating models."
+                        "Increasing this can reduce the repetition rate of model generation.",
+                        zh_Hans="用于控制模型生成时的重复度。提高此项可以降低模型生成的重复度。",
+                    ),
                     type=ParameterType.FLOAT,
                     default=float(credentials.get("presence_penalty", 0)),
                     min=-2,
@@ -239,7 +261,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                 ),
                 ParameterRule(
                     name=DefaultParameterName.MAX_TOKENS.value,
-                    label=I18nObject(en_US="Max Tokens"),
+                    label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+                    help=I18nObject(
+                        en_US="Maximum length of tokens for the model response.", zh_Hans="模型回答的tokens的最大长度。"
+                    ),
                     type=ParameterType.INT,
                     default=512,
                     min=1,
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml b/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
index 7e00dd3f4b..d9497b76b8 100644
--- a/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/openrouter/llm/_position.yaml
@@ -1,3 +1,5 @@
+- openai/o1-preview
+- openai/o1-mini
 - openai/gpt-4o
 - openai/gpt-4o-mini
 - openai/gpt-4
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/llm.py b/api/core/model_runtime/model_providers/openrouter/llm/llm.py
index b6bb249a04..736ab8e7a8 100644
--- a/api/core/model_runtime/model_providers/openrouter/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openrouter/llm/llm.py
@@ -1,7 +1,7 @@
 from collections.abc import Generator
 from typing import Optional, Union
 
-from core.model_runtime.entities.llm_entities import LLMResult
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
 from core.model_runtime.entities.model_entities import AIModelEntity
 from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
@@ -26,7 +26,7 @@ class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):
     ) -> Union[LLMResult, Generator]:
         self._update_credential(model, credentials)
 
-        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+        return self._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
 
     def validate_credentials(self, model: str, credentials: dict) -> None:
         self._update_credential(model, credentials)
@@ -46,7 +46,48 @@ class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):
     ) -> Union[LLMResult, Generator]:
         self._update_credential(model, credentials)
 
-        return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+        block_as_stream = False
+        if model.startswith("openai/o1"):
+            block_as_stream = True
+            stop = None
+
+        # invoke block as stream
+        if stream and block_as_stream:
+            return self._generate_block_as_stream(
+                model, credentials, prompt_messages, model_parameters, tools, stop, user
+            )
+        else:
+            return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+
+    def _generate_block_as_stream(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        user: Optional[str] = None,
+    ) -> Generator:
+        resp: LLMResult = super()._generate(
+            model, credentials, prompt_messages, model_parameters, tools, stop, False, user
+        )
+
+        yield LLMResultChunk(
+            model=model,
+            prompt_messages=prompt_messages,
+            delta=LLMResultChunkDelta(
+                index=0,
+                message=resp.message,
+                usage=self._calc_response_usage(
+                    model=model,
+                    credentials=credentials,
+                    prompt_tokens=resp.usage.prompt_tokens,
+                    completion_tokens=resp.usage.completion_tokens,
+                ),
+                finish_reason="stop",
+            ),
+        )
 
     def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
         self._update_credential(model, credentials)
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml b/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml
new file mode 100644
index 0000000000..85a918ff5e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openrouter/llm/o1-mini.yaml
@@ -0,0 +1,40 @@
+model: openai/o1-mini
+label:
+  en_US: o1-mini
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 65536
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response_format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "3.00"
+  output: "12.00"
+  unit: "0.000001"
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml b/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml
new file mode 100644
index 0000000000..74b0a511be
--- /dev/null
+++ b/api/core/model_runtime/model_providers/openrouter/llm/o1-preview.yaml
@@ -0,0 +1,40 @@
+model: openai/o1-preview
+label:
+  en_US: o1-preview
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response_format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "15.00"
+  output: "60.00"
+  unit: "0.000001"
+  currency: USD
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
index 87712874b9..bf91468fcf 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
index f16f3de60b..781b837e8e 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
index 21267c240b..67210e9020 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
index 80c7ec40f2..482632ff06 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
index 841dd97f35..ddb6fd977c 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
index 33d5d12b22..024c79dbcf 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
new file mode 100644
index 0000000000..94f661f40d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
@@ -0,0 +1,61 @@
+model: Qwen2-72B-Instruct-AWQ-int4
+label:
+  en_US: Qwen2-72B-Instruct-AWQ-int4
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
index 62255cc7d2..a06f8d5ab1 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
@@ -61,3 +61,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
new file mode 100644
index 0000000000..4369411399
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
@@ -0,0 +1,63 @@
+model: Qwen2-7B-Instruct
+label:
+  en_US: Qwen2-7B-Instruct
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: completion
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
index 2f3f1f0225..d549ecd227 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
@@ -61,3 +61,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
new file mode 100644
index 0000000000..15cbf01f1f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-72B-Instruct
+label:
+  en_US: Qwen2.5-72B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 30720
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
new file mode 100644
index 0000000000..dadc8f8f32
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-7B-Instruct
+label:
+  en_US: Qwen2.5-7B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
new file mode 100644
index 0000000000..649be20b48
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
@@ -0,0 +1,61 @@
+model: Reflection-Llama-3.1-70B
+label:
+  en_US: Reflection-Llama-3.1-70B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 10240
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
new file mode 100644
index 0000000000..92eae6804f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
@@ -0,0 +1,61 @@
+model: Yi-1_5-9B-Chat-16K
+label:
+  en_US: Yi-1_5-9B-Chat-16K
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16384
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
new file mode 100644
index 0000000000..0e21ce148c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-1.5B-Chat
+label:
+  en_US: Yi-Coder-1.5B-Chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 20480
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
new file mode 100644
index 0000000000..23b0841ce4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-9B-Chat
+label:
+  en_US: Yi-Coder-9B-Chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 20480
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
index 2c9eac0e49..37bf400f1e 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
@@ -1,15 +1,24 @@
-- Meta-Llama-3.1-405B-Instruct-AWQ-INT4
-- Meta-Llama-3.1-8B-Instruct
-- Meta-Llama-3-70B-Instruct-GPTQ-Int4
-- Meta-Llama-3-8B-Instruct
-- Qwen2-72B-Instruct-GPTQ-Int4
+- Qwen2.5-72B-Instruct
+- Qwen2.5-7B-Instruct
+- Yi-Coder-1.5B-Chat
+- Yi-Coder-9B-Chat
+- Qwen2-72B-Instruct-AWQ-int4
+- Yi-1_5-9B-Chat-16K
+- Qwen2-7B-Instruct
+- Reflection-Llama-3.1-70B
 - Qwen2-72B-Instruct
+- Meta-Llama-3.1-8B-Instruct
+
+- Meta-Llama-3.1-405B-Instruct-AWQ-INT4
+- Meta-Llama-3-70B-Instruct-GPTQ-Int4
+- chatglm3-6b
+- Meta-Llama-3-8B-Instruct
+- Llama3-Chinese_v2
+- deepseek-v2-lite-chat
+- Qwen2-72B-Instruct-GPTQ-Int4
 - Qwen2-7B
 - Qwen-14B-Chat-Int4
 - Qwen1.5-72B-Chat-GPTQ-Int4
 - Qwen1.5-7B
 - Qwen1.5-110B-Chat-GPTQ-Int4
 - deepseek-v2-chat
-- deepseek-v2-lite-chat
-- Llama3-Chinese_v2
-- chatglm3-6b
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
index f9c26b7f90..75d80f784a 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
index 078922ef95..fa9a7b7175 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
index 4ff3af7b51..75a26d2505 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
index 450d22fb75..9a4ead031d 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
@@ -1,7 +1,5 @@
 import logging
 
-from core.model_runtime.entities.model_entities import ModelType
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider
 
 logger = logging.getLogger(__name__)
@@ -9,20 +7,4 @@ logger = logging.getLogger(__name__)
 
 class PerfXCloudProvider(ModelProvider):
     def validate_provider_credentials(self, credentials: dict) -> None:
-        """
-        Validate provider credentials
-        if validate failed, raise exception
-
-        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
-        """
-        try:
-            model_instance = self.get_model_instance(ModelType.LLM)
-
-            # Use `Qwen2_72B_Chat_GPTQ_Int4` model for validate,
-            # no matter what model you pass in, text completion model or chat model
-            model_instance.validate_credentials(model="Qwen2-72B-Instruct-GPTQ-Int4", credentials=credentials)
-        except CredentialsValidateFailedError as ex:
-            raise ex
-        except Exception as ex:
-            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
-            raise ex
+        pass
diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml
new file mode 100644
index 0000000000..03db0d8bce
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml
@@ -0,0 +1,4 @@
+model: gte-Qwen2-7B-instruct
+model_type: text-embedding
+model_properties:
+  context_size: 2048
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index c2f0eb0536..43db4aed11 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,3 +1,7 @@
+- Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-14B-Instruct
+- Qwen/Qwen2.5-32B-Instruct
+- Qwen/Qwen2.5-72B-Instruct
 - Qwen/Qwen2-72B-Instruct
 - Qwen/Qwen2-57B-A14B-Instruct
 - Qwen/Qwen2-7B-Instruct
@@ -6,6 +10,7 @@
 - 01-ai/Yi-1.5-9B-Chat-16K
 - 01-ai/Yi-1.5-6B-Chat
 - THUDM/glm-4-9b-chat
+- deepseek-ai/DeepSeek-V2.5
 - deepseek-ai/DeepSeek-V2-Chat
 - deepseek-ai/DeepSeek-Coder-V2-Instruct
 - internlm/internlm2_5-7b-chat
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
new file mode 100644
index 0000000000..1c8e15ae52
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
@@ -0,0 +1,30 @@
+model: deepseek-ai/DeepSeek-V2.5
+label:
+  en_US: deepseek-ai/DeepSeek-V2.5
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '1.33'
+  output: '1.33'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
new file mode 100644
index 0000000000..02a401464b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-14B-Instruct
+label:
+  en_US: Qwen/Qwen2.5-14B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0.7'
+  output: '0.7'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
new file mode 100644
index 0000000000..d084617e7d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-32B-Instruct
+label:
+  en_US: Qwen/Qwen2.5-32B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '1.26'
+  output: '1.26'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
new file mode 100644
index 0000000000..dfbad2494c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-72B-Instruct
+label:
+  en_US: Qwen/Qwen2.5-72B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '4.13'
+  output: '4.13'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
new file mode 100644
index 0000000000..cdc8ffc4d2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-7B-Instruct
+label:
+  en_US: Qwen/Qwen2.5-7B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0'
+  output: '0'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
new file mode 100644
index 0000000000..8ce336d60c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@@ -0,0 +1,51 @@
+- qwen-vl-max-0809
+- qwen-vl-max-0201
+- qwen-vl-max
+- qwen-max-latest
+- qwen-max-1201
+- qwen-max-0919
+- qwen-max-0428
+- qwen-max-0403
+- qwen-max-0107
+- qwen-max
+- qwen-max-longcontext
+- qwen-plus-latest
+- qwen-plus-0919
+- qwen-plus-0806
+- qwen-plus-0723
+- qwen-plus-0624
+- qwen-plus-0206
+- qwen-plus-chat
+- qwen-plus
+- qwen-vl-plus-0809
+- qwen-vl-plus-0201
+- qwen-vl-plus
+- qwen-turbo-latest
+- qwen-turbo-0919
+- qwen-turbo-0624
+- qwen-turbo-0206
+- qwen-turbo-chat
+- qwen-turbo
+- qwen2.5-72b-instruct
+- qwen2.5-32b-instruct
+- qwen2.5-14b-instruct
+- qwen2.5-7b-instruct
+- qwen2.5-3b-instruct
+- qwen2.5-1.5b-instruct
+- qwen2.5-0.5b-instruct
+- qwen2.5-coder-7b-instruct
+- qwen2-math-72b-instruct
+- qwen2-math-7b-instruct
+- qwen2-math-1.5b-instruct
+- qwen-long
+- qwen-math-plus-latest
+- qwen-math-plus-0919
+- qwen-math-plus-0816
+- qwen-math-plus
+- qwen-math-turbo-latest
+- qwen-math-turbo-0919
+- qwen-math-turbo
+- qwen-coder-turbo-latest
+- qwen-coder-turbo-0919
+- qwen-coder-turbo
+- farui-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index 1d4eba6668..f90c7f075f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -30,7 +30,15 @@ from core.model_runtime.entities.message_entities import (
     ToolPromptMessage,
     UserPromptMessage,
 )
-from core.model_runtime.entities.model_entities import ModelFeature
+from core.model_runtime.entities.model_entities import (
+    AIModelEntity,
+    FetchFrom,
+    I18nObject,
+    ModelFeature,
+    ModelType,
+    ParameterRule,
+    ParameterType,
+)
 from core.model_runtime.errors.invoke import (
     InvokeAuthorizationError,
     InvokeBadRequestError,
@@ -520,3 +528,64 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
                 UnsupportedHTTPMethod,
             ],
         }
+
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
+        """
+        Architecture for defining customizable models
+
+        :param model: model name
+        :param credentials: model credentials
+        :return: AIModelEntity or None
+        """
+        rules = [
+            ParameterRule(
+                name="temperature",
+                type=ParameterType.FLOAT,
+                use_template="temperature",
+                label=I18nObject(zh_Hans="温度", en_US="Temperature"),
+            ),
+            ParameterRule(
+                name="top_p",
+                type=ParameterType.FLOAT,
+                use_template="top_p",
+                label=I18nObject(zh_Hans="Top P", en_US="Top P"),
+            ),
+            ParameterRule(
+                name="top_k",
+                type=ParameterType.INT,
+                min=0,
+                max=99,
+                label=I18nObject(zh_Hans="top_k", en_US="top_k"),
+            ),
+            ParameterRule(
+                name="max_tokens",
+                type=ParameterType.INT,
+                min=1,
+                max=128000,
+                default=1024,
+                label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
+            ),
+            ParameterRule(
+                name="seed",
+                type=ParameterType.INT,
+                default=1234,
+                label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
+            ),
+            ParameterRule(
+                name="repetition_penalty",
+                type=ParameterType.FLOAT,
+                default=1.1,
+                label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
+            ),
+        ]
+
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(en_US=model),
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_type=ModelType.LLM,
+            model_properties={},
+            parameter_rules=rules,
+        )
+
+        return entity
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
new file mode 100644
index 0000000000..ebba565d57
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -0,0 +1,79 @@
+model: qwen-coder-turbo-0919
+label:
+  en_US: qwen-coder-turbo-0919
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
new file mode 100644
index 0000000000..361e2c2373
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -0,0 +1,79 @@
+model: qwen-coder-turbo-latest
+label:
+  en_US: qwen-coder-turbo-latest
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
new file mode 100644
index 0000000000..f4032a4dd3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -0,0 +1,79 @@
+model: qwen-coder-turbo
+label:
+  en_US: qwen-coder-turbo
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
index 33b3435eb6..dbe7d024a5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@@ -1,3 +1,4 @@
+# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
 model: qwen-long
 label:
   en_US: qwen-long
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
new file mode 100644
index 0000000000..89d1302abe
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-plus-0816
+label:
+  en_US: qwen-math-plus-0816
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
new file mode 100644
index 0000000000..032b3c970d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-plus-0919
+label:
+  en_US: qwen-math-plus-0919
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
new file mode 100644
index 0000000000..31dd9f6972
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-plus-latest
+label:
+  en_US: qwen-math-plus-latest
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
new file mode 100644
index 0000000000..1a51d57f78
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-plus
+label:
+  en_US: qwen-math-plus
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
new file mode 100644
index 0000000000..1894eea417
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-turbo-0919
+label:
+  en_US: qwen-math-turbo-0919
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
new file mode 100644
index 0000000000..b8365618b0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-turbo-latest
+label:
+  en_US: qwen-math-turbo-latest
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
new file mode 100644
index 0000000000..8d346d691e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@@ -0,0 +1,79 @@
+model: qwen-math-turbo
+label:
+  en_US: qwen-math-turbo
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 3072
+    min: 1
+    max: 3072
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
new file mode 100644
index 0000000000..c0ad12b85e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -0,0 +1,81 @@
+model: qwen-max-0107
+label:
+  en_US: qwen-max-0107
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.04'
+  output: '0.12'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
index 935a16ebcb..b00fb44d29 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
index c39799a71f..1848dcc07d 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
new file mode 100644
index 0000000000..238882bb12
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -0,0 +1,81 @@
+model: qwen-max-0919
+label:
+  en_US: qwen-max-0919
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.02'
+  output: '0.06'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
index 0368a4a01e..dc234783cd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -79,3 +79,4 @@ pricing:
   output: '0.12'
   unit: '0.001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
new file mode 100644
index 0000000000..9d7d3c2fcb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -0,0 +1,81 @@
+model: qwen-max-latest
+label:
+  en_US: qwen-max-latest
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.02'
+  output: '0.06'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
index 1c705670ca..a7bdc42f73 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -22,9 +22,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8000
     min: 1
-    max: 2000
+    max: 8000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
index 64094effbb..57888406af 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -75,7 +75,7 @@ parameter_rules:
   - name: response_format
     use_template: response_format
 pricing:
-  input: '0.04'
-  output: '0.12'
+  input: '0.02'
+  output: '0.06'
   unit: '0.001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
new file mode 100644
index 0000000000..1e0b816617
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -0,0 +1,79 @@
+model: qwen-plus-0206
+label:
+  en_US: qwen-plus-0206
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8000
+    min: 1
+    max: 8000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
new file mode 100644
index 0000000000..f70c373922
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -0,0 +1,79 @@
+model: qwen-plus-0624
+label:
+  en_US: qwen-plus-0624
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8000
+    min: 1
+    max: 8000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
new file mode 100644
index 0000000000..c6007e9164
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -0,0 +1,79 @@
+model: qwen-plus-0723
+label:
+  en_US: qwen-plus-0723
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8000
+    min: 1
+    max: 8000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
new file mode 100644
index 0000000000..2f53c43336
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -0,0 +1,79 @@
+model: qwen-plus-0806
+label:
+  en_US: qwen-plus-0806
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
new file mode 100644
index 0000000000..90b54ca52e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -0,0 +1,79 @@
+model: qwen-plus-0919
+label:
+  en_US: qwen-plus-0919
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: completion
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.0008'
+  output: '0.002'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
index bc848072ed..59e8851240 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -79,3 +79,4 @@ pricing:
   output: '0.012'
   unit: '0.001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
new file mode 100644
index 0000000000..2a821dbcfe
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -0,0 +1,79 @@
+model: qwen-plus-latest
+label:
+  en_US: qwen-plus-latest
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.0008'
+  output: '0.002'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
index 4be78627f0..626884f4b2 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -3,10 +3,12 @@ label:
   en_US: qwen-plus
 model_type: llm
 features:
+  - multi-tool-call
   - agent-thought
+  - stream-tool-call
 model_properties:
-  mode: completion
-  context_size: 32768
+  mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +22,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8192
     min: 1
-    max: 2000
+    max: 8192
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -73,7 +75,7 @@ parameter_rules:
   - name: response_format
     use_template: response_format
 pricing:
-  input: '0.004'
-  output: '0.012'
+  input: '0.0008'
+  output: '0.002'
   unit: '0.001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
new file mode 100644
index 0000000000..844fced77a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -0,0 +1,79 @@
+model: qwen-turbo-0206
+label:
+  en_US: qwen-turbo-0206
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
new file mode 100644
index 0000000000..0152f75579
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -0,0 +1,79 @@
+model: qwen-turbo-0624
+label:
+  en_US: qwen-turbo-0624
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
new file mode 100644
index 0000000000..19c6c8d293
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -0,0 +1,79 @@
+model: qwen-turbo-0919
+label:
+  en_US: qwen-turbo-0919
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.0003'
+  output: '0.0006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
index f1950577ec..f557f311ef 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -79,3 +79,4 @@ pricing:
   output: '0.006'
   unit: '0.001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
new file mode 100644
index 0000000000..be2475847e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -0,0 +1,79 @@
+model: qwen-turbo-latest
+label:
+  en_US: qwen-turbo-latest
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.0006'
+  output: '0.0003'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
index d4c03100ec..90f13dc19f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -3,10 +3,12 @@ label:
   en_US: qwen-turbo
 model_type: llm
 features:
+  - multi-tool-call
   - agent-thought
+  - stream-tool-call
 model_properties:
-  mode: completion
-  context_size: 8192
+  mode: chat
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +22,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 1500
+    default: 2000
     min: 1
-    max: 1500
+    max: 2000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -73,7 +75,7 @@ parameter_rules:
   - name: response_format
     use_template: response_format
 pricing:
-  input: '0.002'
-  output: '0.006'
+  input: '0.0006'
+  output: '0.0003'
   unit: '0.001'
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
new file mode 100644
index 0000000000..63b6074d0d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -0,0 +1,48 @@
+model: qwen-vl-max-0201
+label:
+  en_US: qwen-vl-max-0201
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.02'
+  output: '0.02'
+  unit: '0.001'
+  currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
new file mode 100644
index 0000000000..41d45966e9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -0,0 +1,57 @@
+model: qwen-vl-max-0809
+label:
+  en_US: qwen-vl-max-0809
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.02'
+  output: '0.02'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
index f917ccaa5d..78d0509374 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@@ -7,7 +7,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 32000
 parameter_rules:
   - name: top_p
     use_template: top_p
@@ -28,6 +28,16 @@ parameter_rules:
     help:
       zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
       en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
   - name: seed
     required: false
     type: int
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
new file mode 100644
index 0000000000..8944388b1e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -0,0 +1,57 @@
+model: qwen-vl-plus-0201
+label:
+  en_US: qwen-vl-plus-0201
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.02'
+  output: '0.02'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
new file mode 100644
index 0000000000..869e0ea71c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -0,0 +1,57 @@
+model: qwen-vl-plus-0809
+label:
+  en_US: qwen-vl-plus-0809
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.008'
+  output: '0.008'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
index e2dd8c4e57..da11bacc64 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@@ -7,7 +7,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 32768
+  context_size: 8000
 parameter_rules:
   - name: top_p
     use_template: top_p
@@ -28,6 +28,16 @@ parameter_rules:
     help:
       zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
       en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
   - name: seed
     required: false
     type: int
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
new file mode 100644
index 0000000000..cfe4b5a666
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2-math-1.5b-instruct
+label:
+  en_US: qwen2-math-1.5b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
new file mode 100644
index 0000000000..e541c197b0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2-math-72b-instruct
+label:
+  en_US: qwen2-math-72b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
new file mode 100644
index 0000000000..ba4514e3d6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2-math-7b-instruct
+label:
+  en_US: qwen2-math-7b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
new file mode 100644
index 0000000000..e5596041af
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-0.5b-instruct
+label:
+  en_US: qwen2.5-0.5b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.000'
+  output: '0.000'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
new file mode 100644
index 0000000000..4004c59417
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-1.5b-instruct
+label:
+  en_US: qwen2.5-1.5b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.000'
+  output: '0.000'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
new file mode 100644
index 0000000000..d8f53666ce
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-14b-instruct
+label:
+  en_US: qwen2.5-14b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.002'
+  output: '0.006'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
new file mode 100644
index 0000000000..890f7e6e4e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-32b-instruct
+label:
+  en_US: qwen2.5-32b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.0035'
+  output: '0.007'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
new file mode 100644
index 0000000000..6d3d2dd5bb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-3b-instruct
+label:
+  en_US: qwen2.5-3b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.000'
+  output: '0.000'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
new file mode 100644
index 0000000000..17d0eb5b35
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-72b-instruct
+label:
+  en_US: qwen2.5-72b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
new file mode 100644
index 0000000000..435b3f90a2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-7b-instruct
+label:
+  en_US: qwen2.5-7b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.001'
+  output: '0.002'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
new file mode 100644
index 0000000000..435b3f90a2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -0,0 +1,79 @@
+model: qwen2.5-7b-instruct
+label:
+  en_US: qwen2.5-7b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.001'
+  output: '0.002'
+  unit: '0.001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
index b251391e34..fabe6d90e6 100644
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -11,15 +11,16 @@ background: "#EFF1FE"
 help:
   title:
     en_US: Get your API key from AliCloud
-    zh_Hans: 从阿里云获取 API Key
+    zh_Hans: 从阿里云百炼获取 API Key
   url:
-    en_US: https://dashscope.console.aliyun.com/api-key_management
+    en_US: https://bailian.console.aliyun.com/?apiKey=1#/api-key
 supported_model_types:
   - llm
   - tts
   - text-embedding
 configurate_methods:
   - predefined-model
+  - customizable-model
 provider_credential_schema:
   credential_form_schemas:
     - variable: dashscope_api_key
@@ -30,3 +31,20 @@ provider_credential_schema:
       placeholder:
         zh_Hans: 在此输入您的 API Key
         en_US: Enter your API Key
+model_credential_schema:
+  model:
+    label:
+      en_US: Model Name
+      zh_Hans: 模型名称
+    placeholder:
+      en_US: Enter full model name
+      zh_Hans: 输入模型全称
+  credential_form_schemas:
+    - variable: dashscope_api_key
+      required: true
+      label:
+        en_US: API Key
+      type: secret-input
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py
index 4fadda5df5..286640079b 100644
--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -19,7 +19,6 @@ from openai.types.chat.chat_completion_message import FunctionCall
 from openai.types.completion import Completion
 from xinference_client.client.restful.restful_client import (
     Client,
-    RESTfulChatglmCppChatModelHandle,
     RESTfulChatModelHandle,
     RESTfulGenerateModelHandle,
 )
@@ -491,7 +490,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
         if tools and len(tools) > 0:
             generate_config["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools]
         vision = credentials.get("support_vision", False)
-        if isinstance(xinference_model, RESTfulChatModelHandle | RESTfulChatglmCppChatModelHandle):
+        if isinstance(xinference_model, RESTfulChatModelHandle):
             resp = client.chat.completions.create(
                 model=credentials["model_uid"],
                 messages=[self._convert_prompt_message_to_dict(message) for message in prompt_messages],
diff --git a/api/core/model_runtime/model_providers/xinference/tts/tts.py b/api/core/model_runtime/model_providers/xinference/tts/tts.py
index 10538b5788..81dbe397d2 100644
--- a/api/core/model_runtime/model_providers/xinference/tts/tts.py
+++ b/api/core/model_runtime/model_providers/xinference/tts/tts.py
@@ -208,21 +208,21 @@ class XinferenceText2SpeechModel(TTSModel):
                 executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
                 futures = [
                     executor.submit(
-                        handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=False
+                        handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=True
                     )
                     for i in range(len(sentences))
                 ]
 
                 for future in futures:
                     response = future.result()
-                    for i in range(0, len(response), 1024):
-                        yield response[i : i + 1024]
+                    for chunk in response:
+                        yield chunk
             else:
                 response = handle.speech(
-                    input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=False
+                    input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=True
                 )
 
-                for i in range(0, len(response), 1024):
-                    yield response[i : i + 1024]
+                for chunk in response:
+                    yield chunk
         except Exception as ex:
             raise InvokeBadRequestError(str(ex))
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py
index bf9b093cb3..fc71d64714 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py
@@ -1,7 +1,8 @@
 from .__version__ import __version__
 from ._client import ZhipuAI
-from .core._errors import (
+from .core import (
     APIAuthenticationError,
+    APIConnectionError,
     APIInternalError,
     APIReachLimitError,
     APIRequestFailedError,
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py
index 659f38d7ff..51f8c49ecb 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py
@@ -1 +1 @@
-__version__ = "v2.0.1"
+__version__ = "v2.1.0"
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py
index df9e506095..705d371e62 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py
@@ -9,15 +9,13 @@ from httpx import Timeout
 from typing_extensions import override
 
 from . import api_resource
-from .core import _jwt_token
-from .core._base_type import NOT_GIVEN, NotGiven
-from .core._errors import ZhipuAIError
-from .core._http_client import ZHIPUAI_DEFAULT_MAX_RETRIES, HttpClient
+from .core import NOT_GIVEN, ZHIPUAI_DEFAULT_MAX_RETRIES, HttpClient, NotGiven, ZhipuAIError, _jwt_token
 
 
 class ZhipuAI(HttpClient):
-    chat: api_resource.chat
+    chat: api_resource.chat.Chat
     api_key: str
+    _disable_token_cache: bool = True
 
     def __init__(
         self,
@@ -28,10 +26,15 @@ class ZhipuAI(HttpClient):
         max_retries: int = ZHIPUAI_DEFAULT_MAX_RETRIES,
         http_client: httpx.Client | None = None,
         custom_headers: Mapping[str, str] | None = None,
+        disable_token_cache: bool = True,
+        _strict_response_validation: bool = False,
     ) -> None:
         if api_key is None:
-            raise ZhipuAIError("No api_key provided, please provide it through parameters or environment variables")
+            api_key = os.environ.get("ZHIPUAI_API_KEY")
+        if api_key is None:
+            raise ZhipuAIError("未提供api_key，请通过参数或环境变量提供")
         self.api_key = api_key
+        self._disable_token_cache = disable_token_cache
 
         if base_url is None:
             base_url = os.environ.get("ZHIPUAI_BASE_URL")
@@ -42,21 +45,31 @@ class ZhipuAI(HttpClient):
         super().__init__(
             version=__version__,
             base_url=base_url,
+            max_retries=max_retries,
             timeout=timeout,
             custom_httpx_client=http_client,
             custom_headers=custom_headers,
+            _strict_response_validation=_strict_response_validation,
         )
         self.chat = api_resource.chat.Chat(self)
         self.images = api_resource.images.Images(self)
         self.embeddings = api_resource.embeddings.Embeddings(self)
         self.files = api_resource.files.Files(self)
         self.fine_tuning = api_resource.fine_tuning.FineTuning(self)
+        self.batches = api_resource.Batches(self)
+        self.knowledge = api_resource.Knowledge(self)
+        self.tools = api_resource.Tools(self)
+        self.videos = api_resource.Videos(self)
+        self.assistant = api_resource.Assistant(self)
 
     @property
     @override
-    def _auth_headers(self) -> dict[str, str]:
+    def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
-        return {"Authorization": f"{_jwt_token.generate_token(api_key)}"}
+        if self._disable_token_cache:
+            return {"Authorization": f"Bearer {api_key}"}
+        else:
+            return {"Authorization": f"Bearer {_jwt_token.generate_token(api_key)}"}
 
     def __del__(self) -> None:
         if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close") or not hasattr(self, "_client"):
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py
index 0a90e21e48..4fe0719dde 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py
@@ -1,5 +1,34 @@
-from .chat import chat
+from .assistant import (
+    Assistant,
+)
+from .batches import Batches
+from .chat import (
+    AsyncCompletions,
+    Chat,
+    Completions,
+)
 from .embeddings import Embeddings
-from .files import Files
-from .fine_tuning import fine_tuning
+from .files import Files, FilesWithRawResponse
+from .fine_tuning import FineTuning
 from .images import Images
+from .knowledge import Knowledge
+from .tools import Tools
+from .videos import (
+    Videos,
+)
+
+__all__ = [
+    "Videos",
+    "AsyncCompletions",
+    "Chat",
+    "Completions",
+    "Images",
+    "Embeddings",
+    "Files",
+    "FilesWithRawResponse",
+    "FineTuning",
+    "Batches",
+    "Knowledge",
+    "Tools",
+    "Assistant",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py
new file mode 100644
index 0000000000..ce619aa7f0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py
@@ -0,0 +1,3 @@
+from .assistant import Assistant
+
+__all__ = ["Assistant"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py
new file mode 100644
index 0000000000..f772340a82
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import httpx
+
+from ...core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    StreamResponse,
+    deepcopy_minimal,
+    make_request_options,
+    maybe_transform,
+)
+from ...types.assistant import AssistantCompletion
+from ...types.assistant.assistant_conversation_resp import ConversationUsageListResp
+from ...types.assistant.assistant_support_resp import AssistantSupportResp
+
+if TYPE_CHECKING:
+    from ..._client import ZhipuAI
+
+from ...types.assistant import assistant_conversation_params, assistant_create_params
+
+__all__ = ["Assistant"]
+
+
+class Assistant(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    def conversation(
+        self,
+        assistant_id: str,
+        model: str,
+        messages: list[assistant_create_params.ConversationMessage],
+        *,
+        stream: bool = True,
+        conversation_id: Optional[str] = None,
+        attachments: Optional[list[assistant_create_params.AssistantAttachments]] = None,
+        metadata: dict | None = None,
+        request_id: str = None,
+        user_id: str = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> StreamResponse[AssistantCompletion]:
+        body = deepcopy_minimal(
+            {
+                "assistant_id": assistant_id,
+                "model": model,
+                "messages": messages,
+                "stream": stream,
+                "conversation_id": conversation_id,
+                "attachments": attachments,
+                "metadata": metadata,
+                "request_id": request_id,
+                "user_id": user_id,
+            }
+        )
+        return self._post(
+            "/assistant",
+            body=maybe_transform(body, assistant_create_params.AssistantParameters),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=AssistantCompletion,
+            stream=stream or True,
+            stream_cls=StreamResponse[AssistantCompletion],
+        )
+
+    def query_support(
+        self,
+        *,
+        assistant_id_list: list[str] = None,
+        request_id: str = None,
+        user_id: str = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantSupportResp:
+        body = deepcopy_minimal(
+            {
+                "assistant_id_list": assistant_id_list,
+                "request_id": request_id,
+                "user_id": user_id,
+            }
+        )
+        return self._post(
+            "/assistant/list",
+            body=body,
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=AssistantSupportResp,
+        )
+
+    def query_conversation_usage(
+        self,
+        assistant_id: str,
+        page: int = 1,
+        page_size: int = 10,
+        *,
+        request_id: str = None,
+        user_id: str = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ConversationUsageListResp:
+        body = deepcopy_minimal(
+            {
+                "assistant_id": assistant_id,
+                "page": page,
+                "page_size": page_size,
+                "request_id": request_id,
+                "user_id": user_id,
+            }
+        )
+        return self._post(
+            "/assistant/conversation/list",
+            body=maybe_transform(body, assistant_conversation_params.ConversationParameters),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=ConversationUsageListResp,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py
new file mode 100644
index 0000000000..ae2f2be85e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Literal, Optional
+
+import httpx
+
+from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options, maybe_transform
+from ..core.pagination import SyncCursorPage
+from ..types import batch_create_params, batch_list_params
+from ..types.batch import Batch
+
+if TYPE_CHECKING:
+    from .._client import ZhipuAI
+
+
+class Batches(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    def create(
+        self,
+        *,
+        completion_window: str | None = None,
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
+        input_file_id: str,
+        metadata: Optional[dict[str, str]] | NotGiven = NOT_GIVEN,
+        auto_delete_input_file: bool = True,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                    "auto_delete_input_file": auto_delete_input_file,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Batch]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+            `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=SyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          batch_id: The ID of the batch to cancel.
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=Batch,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py
index e69de29bb2..5cd8dc6f33 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py
@@ -0,0 +1,5 @@
+from .async_completions import AsyncCompletions
+from .chat import Chat
+from .completions import Completions
+
+__all__ = ["AsyncCompletions", "Chat", "Completions"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
index 1f80119739..d8ecc31064 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
@@ -1,13 +1,25 @@
 from __future__ import annotations
 
+import logging
 from typing import TYPE_CHECKING, Literal, Optional, Union
 
 import httpx
 
-from ...core._base_api import BaseAPI
-from ...core._base_type import NOT_GIVEN, Headers, NotGiven
-from ...core._http_client import make_user_request_input
+from ...core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    drop_prefix_image_data,
+    make_request_options,
+    maybe_transform,
+)
 from ...types.chat.async_chat_completion import AsyncCompletion, AsyncTaskStatus
+from ...types.chat.code_geex import code_geex_params
+from ...types.sensitive_word_check import SensitiveWordCheckRequest
+
+logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
     from ..._client import ZhipuAI
@@ -22,6 +34,7 @@ class AsyncCompletions(BaseAPI):
         *,
         model: str,
         request_id: Optional[str] | NotGiven = NOT_GIVEN,
+        user_id: Optional[str] | NotGiven = NOT_GIVEN,
         do_sample: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -29,50 +42,74 @@ class AsyncCompletions(BaseAPI):
         seed: int | NotGiven = NOT_GIVEN,
         messages: Union[str, list[str], list[int], list[list[int]], None],
         stop: Optional[Union[str, list[str], None]] | NotGiven = NOT_GIVEN,
-        sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN,
+        sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
         tools: Optional[object] | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
+        meta: Optional[dict[str, str]] | NotGiven = NOT_GIVEN,
+        extra: Optional[code_geex_params.CodeGeexExtra] | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
-        disable_strict_validation: Optional[bool] | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncTaskStatus:
         _cast_type = AsyncTaskStatus
+        logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+        if temperature is not None and temperature != NOT_GIVEN:
+            if temperature <= 0:
+                do_sample = False
+                temperature = 0.01
+                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间，do_sample重写为:false（参数top_p temperture不生效）")  # noqa: E501
+            if temperature >= 1:
+                temperature = 0.99
+                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间")
+        if top_p is not None and top_p != NOT_GIVEN:
+            if top_p >= 1:
+                top_p = 0.99
+                # logger.warning("top_p:取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1")
+            if top_p <= 0:
+                top_p = 0.01
+                # logger.warning("top_p:取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1")
 
-        if disable_strict_validation:
-            _cast_type = object
+        logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+        if isinstance(messages, list):
+            for item in messages:
+                if item.get("content"):
+                    item["content"] = drop_prefix_image_data(item["content"])
+
+        body = {
+            "model": model,
+            "request_id": request_id,
+            "user_id": user_id,
+            "temperature": temperature,
+            "top_p": top_p,
+            "do_sample": do_sample,
+            "max_tokens": max_tokens,
+            "seed": seed,
+            "messages": messages,
+            "stop": stop,
+            "sensitive_word_check": sensitive_word_check,
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "meta": meta,
+            "extra": maybe_transform(extra, code_geex_params.CodeGeexExtra),
+        }
         return self._post(
             "/async/chat/completions",
-            body={
-                "model": model,
-                "request_id": request_id,
-                "temperature": temperature,
-                "top_p": top_p,
-                "do_sample": do_sample,
-                "max_tokens": max_tokens,
-                "seed": seed,
-                "messages": messages,
-                "stop": stop,
-                "sensitive_word_check": sensitive_word_check,
-                "tools": tools,
-                "tool_choice": tool_choice,
-            },
-            options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+            body=body,
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
             cast_type=_cast_type,
-            enable_stream=False,
+            stream=False,
         )
 
     def retrieve_completion_result(
         self,
         id: str,
         extra_headers: Headers | None = None,
-        disable_strict_validation: Optional[bool] | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Union[AsyncCompletion, AsyncTaskStatus]:
         _cast_type = Union[AsyncCompletion, AsyncTaskStatus]
-        if disable_strict_validation:
-            _cast_type = object
         return self._get(
             path=f"/async-result/{id}",
             cast_type=_cast_type,
-            options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
         )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py
index 92362fc50a..b3cc46566c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py
@@ -1,17 +1,18 @@
 from typing import TYPE_CHECKING
 
-from ...core._base_api import BaseAPI
+from ...core import BaseAPI, cached_property
 from .async_completions import AsyncCompletions
 from .completions import Completions
 
 if TYPE_CHECKING:
-    from ..._client import ZhipuAI
+    pass
 
 
 class Chat(BaseAPI):
-    completions: Completions
+    @cached_property
+    def completions(self) -> Completions:
+        return Completions(self._client)
 
-    def __init__(self, client: "ZhipuAI") -> None:
-        super().__init__(client)
-        self.completions = Completions(client)
-        self.asyncCompletions = AsyncCompletions(client)
+    @cached_property
+    def asyncCompletions(self) -> AsyncCompletions:  # noqa: N802
+        return AsyncCompletions(self._client)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
index ec29f33864..1c23473a03 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
@@ -1,15 +1,28 @@
 from __future__ import annotations
 
+import logging
 from typing import TYPE_CHECKING, Literal, Optional, Union
 
 import httpx
 
-from ...core._base_api import BaseAPI
-from ...core._base_type import NOT_GIVEN, Headers, NotGiven
-from ...core._http_client import make_user_request_input
-from ...core._sse_client import StreamResponse
+from ...core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    StreamResponse,
+    deepcopy_minimal,
+    drop_prefix_image_data,
+    make_request_options,
+    maybe_transform,
+)
 from ...types.chat.chat_completion import Completion
 from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.code_geex import code_geex_params
+from ...types.sensitive_word_check import SensitiveWordCheckRequest
+
+logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
     from ..._client import ZhipuAI
@@ -24,6 +37,7 @@ class Completions(BaseAPI):
         *,
         model: str,
         request_id: Optional[str] | NotGiven = NOT_GIVEN,
+        user_id: Optional[str] | NotGiven = NOT_GIVEN,
         do_sample: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -32,23 +46,43 @@ class Completions(BaseAPI):
         seed: int | NotGiven = NOT_GIVEN,
         messages: Union[str, list[str], list[int], object, None],
         stop: Optional[Union[str, list[str], None]] | NotGiven = NOT_GIVEN,
-        sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN,
+        sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
         tools: Optional[object] | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
+        meta: Optional[dict[str, str]] | NotGiven = NOT_GIVEN,
+        extra: Optional[code_geex_params.CodeGeexExtra] | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
-        disable_strict_validation: Optional[bool] | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion | StreamResponse[ChatCompletionChunk]:
-        _cast_type = Completion
-        _stream_cls = StreamResponse[ChatCompletionChunk]
-        if disable_strict_validation:
-            _cast_type = object
-            _stream_cls = StreamResponse[object]
-        return self._post(
-            "/chat/completions",
-            body={
+        logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+        if temperature is not None and temperature != NOT_GIVEN:
+            if temperature <= 0:
+                do_sample = False
+                temperature = 0.01
+                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间，do_sample重写为:false（参数top_p temperture不生效）")  # noqa: E501
+            if temperature >= 1:
+                temperature = 0.99
+                # logger.warning("temperature:取值范围是：(0.0, 1.0) 开区间")
+        if top_p is not None and top_p != NOT_GIVEN:
+            if top_p >= 1:
+                top_p = 0.99
+                # logger.warning("top_p:取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1")
+            if top_p <= 0:
+                top_p = 0.01
+                # logger.warning("top_p:取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1")
+
+        logger.debug(f"temperature:{temperature}, top_p:{top_p}")
+        if isinstance(messages, list):
+            for item in messages:
+                if item.get("content"):
+                    item["content"] = drop_prefix_image_data(item["content"])
+
+        body = deepcopy_minimal(
+            {
                 "model": model,
                 "request_id": request_id,
+                "user_id": user_id,
                 "temperature": temperature,
                 "top_p": top_p,
                 "do_sample": do_sample,
@@ -60,11 +94,15 @@ class Completions(BaseAPI):
                 "stream": stream,
                 "tools": tools,
                 "tool_choice": tool_choice,
-            },
-            options=make_user_request_input(
-                extra_headers=extra_headers,
-            ),
-            cast_type=_cast_type,
-            enable_stream=stream or False,
-            stream_cls=_stream_cls,
+                "meta": meta,
+                "extra": maybe_transform(extra, code_geex_params.CodeGeexExtra),
+            }
+        )
+        return self._post(
+            "/chat/completions",
+            body=body,
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=Completion,
+            stream=stream or False,
+            stream_cls=StreamResponse[ChatCompletionChunk],
         )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py
index 2308a20451..4b4baef942 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py
@@ -4,9 +4,7 @@ from typing import TYPE_CHECKING, Optional, Union
 
 import httpx
 
-from ..core._base_api import BaseAPI
-from ..core._base_type import NOT_GIVEN, Headers, NotGiven
-from ..core._http_client import make_user_request_input
+from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options
 from ..types.embeddings import EmbeddingsResponded
 
 if TYPE_CHECKING:
@@ -22,10 +20,13 @@ class Embeddings(BaseAPI):
         *,
         input: Union[str, list[str], list[int], list[list[int]]],
         model: Union[str],
+        dimensions: Union[int] | NotGiven = NOT_GIVEN,
         encoding_format: str | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
+        request_id: Optional[str] | NotGiven = NOT_GIVEN,
         sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         disable_strict_validation: Optional[bool] | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> EmbeddingsResponded:
@@ -37,11 +38,13 @@ class Embeddings(BaseAPI):
             body={
                 "input": input,
                 "model": model,
+                "dimensions": dimensions,
                 "encoding_format": encoding_format,
                 "user": user,
+                "request_id": request_id,
                 "sensitive_word_check": sensitive_word_check,
             },
-            options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
             cast_type=_cast_type,
-            enable_stream=False,
+            stream=False,
         )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py
index f2ac74bffa..ba9de75b7e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py
@@ -1,19 +1,30 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Literal, cast
 
 import httpx
 
-from ..core._base_api import BaseAPI
-from ..core._base_type import NOT_GIVEN, FileTypes, Headers, NotGiven
-from ..core._files import is_file_content
-from ..core._http_client import make_user_request_input
-from ..types.file_object import FileObject, ListOfFileObject
+from ..core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    FileTypes,
+    Headers,
+    NotGiven,
+    _legacy_binary_response,
+    _legacy_response,
+    deepcopy_minimal,
+    extract_files,
+    make_request_options,
+    maybe_transform,
+)
+from ..types.files import FileDeleted, FileObject, ListOfFileObject, UploadDetail, file_create_params
 
 if TYPE_CHECKING:
     from .._client import ZhipuAI
 
-__all__ = ["Files"]
+__all__ = ["Files", "FilesWithRawResponse"]
 
 
 class Files(BaseAPI):
@@ -23,30 +34,69 @@ class Files(BaseAPI):
     def create(
         self,
         *,
-        file: FileTypes,
-        purpose: str,
+        file: FileTypes = None,
+        upload_detail: list[UploadDetail] = None,
+        purpose: Literal["fine-tune", "retrieval", "batch"],
+        knowledge_id: str = None,
+        sentence_size: int = None,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileObject:
-        if not is_file_content(file):
-            prefix = f"Expected file input `{file!r}`"
-            raise RuntimeError(
-                f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(file)} instead."
-            ) from None
-        files = [("file", file)]
-
-        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        if not file and not upload_detail:
+            raise ValueError("At least one of `file` and `upload_detail` must be provided.")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "upload_detail": upload_detail,
+                "purpose": purpose,
+                "knowledge_id": knowledge_id,
+                "sentence_size": sentence_size,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/files",
-            body={
-                "purpose": purpose,
-            },
+            body=maybe_transform(body, file_create_params.FileCreateParams),
             files=files,
-            options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
             cast_type=FileObject,
         )
 
+    # def retrieve(
+    #         self,
+    #         file_id: str,
+    #         *,
+    #         extra_headers: Headers | None = None,
+    #         extra_body: Body | None = None,
+    #         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    # ) -> FileObject:
+    #     """
+    #     Returns information about a specific file.
+    #
+    #     Args:
+    #       file_id: The ID of the file to retrieve information about
+    #       extra_headers: Send extra headers
+    #
+    #       extra_body: Add additional JSON properties to the request
+    #
+    #       timeout: Override the client-level default timeout for this request, in seconds
+    #     """
+    #     if not file_id:
+    #         raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+    #     return self._get(
+    #         f"/files/{file_id}",
+    #         options=make_request_options(
+    #             extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
+    #         ),
+    #         cast_type=FileObject,
+    #     )
+
     def list(
         self,
         *,
@@ -55,13 +105,15 @@ class Files(BaseAPI):
         after: str | NotGiven = NOT_GIVEN,
         order: str | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ListOfFileObject:
         return self._get(
             "/files",
             cast_type=ListOfFileObject,
-            options=make_user_request_input(
+            options=make_request_options(
                 extra_headers=extra_headers,
+                extra_body=extra_body,
                 timeout=timeout,
                 query={
                     "purpose": purpose,
@@ -71,3 +123,72 @@ class Files(BaseAPI):
                 },
             ),
         )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          file_id: The ID of the file to delete
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=FileDeleted,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=_legacy_binary_response.HttpxBinaryResponseContent,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py
index e69de29bb2..7c309b8341 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py
@@ -0,0 +1,5 @@
+from .fine_tuning import FineTuning
+from .jobs import Jobs
+from .models import FineTunedModels
+
+__all__ = ["Jobs", "FineTunedModels", "FineTuning"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py
index dc30bd33ed..8670f7de00 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py
@@ -1,15 +1,18 @@
 from typing import TYPE_CHECKING
 
-from ...core._base_api import BaseAPI
+from ...core import BaseAPI, cached_property
 from .jobs import Jobs
+from .models import FineTunedModels
 
 if TYPE_CHECKING:
-    from ..._client import ZhipuAI
+    pass
 
 
 class FineTuning(BaseAPI):
-    jobs: Jobs
+    @cached_property
+    def jobs(self) -> Jobs:
+        return Jobs(self._client)
 
-    def __init__(self, client: "ZhipuAI") -> None:
-        super().__init__(client)
-        self.jobs = Jobs(client)
+    @cached_property
+    def models(self) -> FineTunedModels:
+        return FineTunedModels(self._client)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..40777a153f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py
@@ -0,0 +1,3 @@
+from .jobs import Jobs
+
+__all__ = ["Jobs"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py
similarity index 53%
rename from api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py
rename to api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py
index 3d2e9208a1..8b038cadc0 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py
@@ -4,13 +4,23 @@ from typing import TYPE_CHECKING, Optional
 
 import httpx
 
-from ...core._base_api import BaseAPI
-from ...core._base_type import NOT_GIVEN, Headers, NotGiven
-from ...core._http_client import make_user_request_input
-from ...types.fine_tuning import FineTuningJob, FineTuningJobEvent, ListOfFineTuningJob, job_create_params
+from ....core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    make_request_options,
+)
+from ....types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+    ListOfFineTuningJob,
+    job_create_params,
+)
 
 if TYPE_CHECKING:
-    from ..._client import ZhipuAI
+    from ...._client import ZhipuAI
 
 __all__ = ["Jobs"]
 
@@ -29,6 +39,7 @@ class Jobs(BaseAPI):
         request_id: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuningJob:
         return self._post(
@@ -41,7 +52,7 @@ class Jobs(BaseAPI):
                 "validation_file": validation_file,
                 "request_id": request_id,
             },
-            options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
             cast_type=FineTuningJob,
         )
 
@@ -50,11 +61,12 @@ class Jobs(BaseAPI):
         fine_tuning_job_id: str,
         *,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuningJob:
         return self._get(
             f"/fine_tuning/jobs/{fine_tuning_job_id}",
-            options=make_user_request_input(extra_headers=extra_headers, timeout=timeout),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
             cast_type=FineTuningJob,
         )
 
@@ -64,13 +76,15 @@ class Jobs(BaseAPI):
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ListOfFineTuningJob:
         return self._get(
             "/fine_tuning/jobs",
             cast_type=ListOfFineTuningJob,
-            options=make_user_request_input(
+            options=make_request_options(
                 extra_headers=extra_headers,
+                extra_body=extra_body,
                 timeout=timeout,
                 query={
                     "after": after,
@@ -79,6 +93,24 @@ class Jobs(BaseAPI):
             ),
         )
 
+    def cancel(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.  # noqa: E501
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=FineTuningJob,
+        )
+
     def list_events(
         self,
         fine_tuning_job_id: str,
@@ -86,13 +118,15 @@ class Jobs(BaseAPI):
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuningJobEvent:
         return self._get(
             f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
             cast_type=FineTuningJobEvent,
-            options=make_user_request_input(
+            options=make_request_options(
                 extra_headers=extra_headers,
+                extra_body=extra_body,
                 timeout=timeout,
                 query={
                     "after": after,
@@ -100,3 +134,19 @@ class Jobs(BaseAPI):
                 },
             ),
         )
+
+    def delete(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._delete(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=FineTuningJob,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py
new file mode 100644
index 0000000000..d832635baf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py
@@ -0,0 +1,3 @@
+from .fine_tuned_models import FineTunedModels
+
+__all__ = ["FineTunedModels"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py
new file mode 100644
index 0000000000..29c023e3b1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import httpx
+
+from ....core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    make_request_options,
+)
+from ....types.fine_tuning.models import FineTunedModelsStatus
+
+if TYPE_CHECKING:
+    from ...._client import ZhipuAI
+
+__all__ = ["FineTunedModels"]
+
+
+class FineTunedModels(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    def delete(
+        self,
+        fine_tuned_model: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTunedModelsStatus:
+        if not fine_tuned_model:
+            raise ValueError(f"Expected a non-empty value for `fine_tuned_model` but received {fine_tuned_model!r}")
+        return self._delete(
+            f"fine_tuning/fine_tuned_models/{fine_tuned_model}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=FineTunedModelsStatus,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py
index 2692b093af..8ad411913f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py
@@ -4,10 +4,9 @@ from typing import TYPE_CHECKING, Optional
 
 import httpx
 
-from ..core._base_api import BaseAPI
-from ..core._base_type import NOT_GIVEN, Body, Headers, NotGiven
-from ..core._http_client import make_user_request_input
+from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options
 from ..types.image import ImagesResponded
+from ..types.sensitive_word_check import SensitiveWordCheckRequest
 
 if TYPE_CHECKING:
     from .._client import ZhipuAI
@@ -27,8 +26,10 @@ class Images(BaseAPI):
         response_format: Optional[str] | NotGiven = NOT_GIVEN,
         size: Optional[str] | NotGiven = NOT_GIVEN,
         style: Optional[str] | NotGiven = NOT_GIVEN,
+        sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         request_id: Optional[str] | NotGiven = NOT_GIVEN,
+        user_id: Optional[str] | NotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_body: Body | None = None,
         disable_strict_validation: Optional[bool] | None = None,
@@ -45,12 +46,14 @@ class Images(BaseAPI):
                 "n": n,
                 "quality": quality,
                 "response_format": response_format,
+                "sensitive_word_check": sensitive_word_check,
                 "size": size,
                 "style": style,
                 "user": user,
+                "user_id": user_id,
                 "request_id": request_id,
             },
-            options=make_user_request_input(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
             cast_type=_cast_type,
-            enable_stream=False,
+            stream=False,
         )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py
new file mode 100644
index 0000000000..5a67d743c3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py
@@ -0,0 +1,3 @@
+from .knowledge import Knowledge
+
+__all__ = ["Knowledge"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py
new file mode 100644
index 0000000000..fd289e2232
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py
@@ -0,0 +1,3 @@
+from .document import Document
+
+__all__ = ["Document"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py
new file mode 100644
index 0000000000..2c4066d893
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Literal, Optional, cast
+
+import httpx
+
+from ....core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    FileTypes,
+    Headers,
+    NotGiven,
+    deepcopy_minimal,
+    extract_files,
+    make_request_options,
+    maybe_transform,
+)
+from ....types.files import UploadDetail, file_create_params
+from ....types.knowledge.document import DocumentData, DocumentObject, document_edit_params, document_list_params
+from ....types.knowledge.document.document_list_resp import DocumentPage
+
+if TYPE_CHECKING:
+    from ...._client import ZhipuAI
+
+__all__ = ["Document"]
+
+
+class Document(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    def create(
+        self,
+        *,
+        file: FileTypes = None,
+        custom_separator: Optional[list[str]] = None,
+        upload_detail: list[UploadDetail] = None,
+        purpose: Literal["retrieval"],
+        knowledge_id: str = None,
+        sentence_size: int = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DocumentObject:
+        if not file and not upload_detail:
+            raise ValueError("At least one of `file` and `upload_detail` must be provided.")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "upload_detail": upload_detail,
+                "purpose": purpose,
+                "custom_separator": custom_separator,
+                "knowledge_id": knowledge_id,
+                "sentence_size": sentence_size,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=DocumentObject,
+        )
+
+    def edit(
+        self,
+        document_id: str,
+        knowledge_type: str,
+        *,
+        custom_separator: Optional[list[str]] = None,
+        sentence_size: Optional[int] = None,
+        callback_url: Optional[str] = None,
+        callback_header: Optional[dict[str, str]] = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        """
+
+        Args:
+          document_id: 知识id
+          knowledge_type: 知识类型:
+                        1:文章知识: 支持pdf,url,docx
+                        2.问答知识-文档:  支持pdf,url,docx
+                        3.问答知识-表格:  支持xlsx
+                        4.商品库-表格:  支持xlsx
+                        5.自定义:  支持pdf,url,docx
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+          :param knowledge_type:
+          :param document_id:
+          :param timeout:
+          :param extra_body:
+          :param callback_header:
+          :param sentence_size:
+          :param extra_headers:
+          :param callback_url:
+          :param custom_separator:
+        """
+        if not document_id:
+            raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}")
+
+        body = deepcopy_minimal(
+            {
+                "id": document_id,
+                "knowledge_type": knowledge_type,
+                "custom_separator": custom_separator,
+                "sentence_size": sentence_size,
+                "callback_url": callback_url,
+                "callback_header": callback_header,
+            }
+        )
+
+        return self._put(
+            f"/document/{document_id}",
+            body=maybe_transform(body, document_edit_params.DocumentEditParams),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=httpx.Response,
+        )
+
+    def list(
+        self,
+        knowledge_id: str,
+        *,
+        purpose: str | NotGiven = NOT_GIVEN,
+        page: str | NotGiven = NOT_GIVEN,
+        limit: str | NotGiven = NOT_GIVEN,
+        order: Literal["desc", "asc"] | NotGiven = NOT_GIVEN,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DocumentPage:
+        return self._get(
+            "/files",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "knowledge_id": knowledge_id,
+                        "purpose": purpose,
+                        "page": page,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    document_list_params.DocumentListParams,
+                ),
+            ),
+            cast_type=DocumentPage,
+        )
+
+    def delete(
+        self,
+        document_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        """
+        Delete a file.
+
+        Args:
+
+          document_id: 知识id
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not document_id:
+            raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}")
+
+        return self._delete(
+            f"/document/{document_id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=httpx.Response,
+        )
+
+    def retrieve(
+        self,
+        document_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> DocumentData:
+        """
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not document_id:
+            raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}")
+
+        return self._get(
+            f"/document/{document_id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=DocumentData,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py
new file mode 100644
index 0000000000..fea4c73ac9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Literal, Optional
+
+import httpx
+
+from ...core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    cached_property,
+    deepcopy_minimal,
+    make_request_options,
+    maybe_transform,
+)
+from ...types.knowledge import KnowledgeInfo, KnowledgeUsed, knowledge_create_params, knowledge_list_params
+from ...types.knowledge.knowledge_list_resp import KnowledgePage
+from .document import Document
+
+if TYPE_CHECKING:
+    from ..._client import ZhipuAI
+
+__all__ = ["Knowledge"]
+
+
+class Knowledge(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    @cached_property
+    def document(self) -> Document:
+        return Document(self._client)
+
+    def create(
+        self,
+        embedding_id: int,
+        name: str,
+        *,
+        customer_identifier: Optional[str] = None,
+        description: Optional[str] = None,
+        background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None,
+        icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None,
+        bucket_id: Optional[str] = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> KnowledgeInfo:
+        body = deepcopy_minimal(
+            {
+                "embedding_id": embedding_id,
+                "name": name,
+                "customer_identifier": customer_identifier,
+                "description": description,
+                "background": background,
+                "icon": icon,
+                "bucket_id": bucket_id,
+            }
+        )
+        return self._post(
+            "/knowledge",
+            body=maybe_transform(body, knowledge_create_params.KnowledgeBaseParams),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=KnowledgeInfo,
+        )
+
+    def modify(
+        self,
+        knowledge_id: str,
+        embedding_id: int,
+        *,
+        name: str,
+        description: Optional[str] = None,
+        background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None,
+        icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        body = deepcopy_minimal(
+            {
+                "id": knowledge_id,
+                "embedding_id": embedding_id,
+                "name": name,
+                "description": description,
+                "background": background,
+                "icon": icon,
+            }
+        )
+        return self._put(
+            f"/knowledge/{knowledge_id}",
+            body=maybe_transform(body, knowledge_create_params.KnowledgeBaseParams),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=httpx.Response,
+        )
+
+    def query(
+        self,
+        *,
+        page: int | NotGiven = 1,
+        size: int | NotGiven = 10,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> KnowledgePage:
+        return self._get(
+            "/knowledge",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "page": page,
+                        "size": size,
+                    },
+                    knowledge_list_params.KnowledgeListParams,
+                ),
+            ),
+            cast_type=KnowledgePage,
+        )
+
+    def delete(
+        self,
+        knowledge_id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        """
+        Delete a file.
+
+        Args:
+          knowledge_id: 知识库ID
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not knowledge_id:
+            raise ValueError("Expected a non-empty value for `knowledge_id`")
+
+        return self._delete(
+            f"/knowledge/{knowledge_id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=httpx.Response,
+        )
+
+    def used(
+        self,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> KnowledgeUsed:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/knowledge/capacity",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=KnowledgeUsed,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py
new file mode 100644
index 0000000000..43e4e37da1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py
@@ -0,0 +1,3 @@
+from .tools import Tools
+
+__all__ = ["Tools"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py
new file mode 100644
index 0000000000..3c3a630aff
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Literal, Optional, Union
+
+import httpx
+
+from ...core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    StreamResponse,
+    deepcopy_minimal,
+    make_request_options,
+    maybe_transform,
+)
+from ...types.tools import WebSearch, WebSearchChunk, tools_web_search_params
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from ..._client import ZhipuAI
+
+__all__ = ["Tools"]
+
+
+class Tools(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    def web_search(
+        self,
+        *,
+        model: str,
+        request_id: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        messages: Union[str, list[str], list[int], object, None],
+        scope: Optional[str] | NotGiven = NOT_GIVEN,
+        location: Optional[str] | NotGiven = NOT_GIVEN,
+        recent_days: Optional[int] | NotGiven = NOT_GIVEN,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> WebSearch | StreamResponse[WebSearchChunk]:
+        body = deepcopy_minimal(
+            {
+                "model": model,
+                "request_id": request_id,
+                "messages": messages,
+                "stream": stream,
+                "scope": scope,
+                "location": location,
+                "recent_days": recent_days,
+            }
+        )
+        return self._post(
+            "/tools",
+            body=maybe_transform(body, tools_web_search_params.WebSearchParams),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=WebSearch,
+            stream=stream or False,
+            stream_cls=StreamResponse[WebSearchChunk],
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py
new file mode 100644
index 0000000000..6b0f99ed09
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py
@@ -0,0 +1,7 @@
+from .videos import (
+    Videos,
+)
+
+__all__ = [
+    "Videos",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py
new file mode 100644
index 0000000000..f1f1c08036
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import httpx
+
+from ...core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    deepcopy_minimal,
+    make_request_options,
+    maybe_transform,
+)
+from ...types.sensitive_word_check import SensitiveWordCheckRequest
+from ...types.video import VideoObject, video_create_params
+
+if TYPE_CHECKING:
+    from ..._client import ZhipuAI
+
+__all__ = ["Videos"]
+
+
+class Videos(BaseAPI):
+    def __init__(self, client: ZhipuAI) -> None:
+        super().__init__(client)
+
+    def generations(
+        self,
+        model: str,
+        *,
+        prompt: str = None,
+        image_url: str = None,
+        sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
+        request_id: str = None,
+        user_id: str = None,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VideoObject:
+        if not model and not model:
+            raise ValueError("At least one of `model` and `prompt` must be provided.")
+        body = deepcopy_minimal(
+            {
+                "model": model,
+                "prompt": prompt,
+                "image_url": image_url,
+                "sensitive_word_check": sensitive_word_check,
+                "request_id": request_id,
+                "user_id": user_id,
+            }
+        )
+        return self._post(
+            "/videos/generations",
+            body=maybe_transform(body, video_create_params.VideoCreateParams),
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=VideoObject,
+        )
+
+    def retrieve_videos_result(
+        self,
+        id: str,
+        *,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VideoObject:
+        if not id:
+            raise ValueError("At least one of `id` must be provided.")
+
+        return self._get(
+            f"/async-result/{id}",
+            options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout),
+            cast_type=VideoObject,
+        )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py
index e69de29bb2..3d6466d279 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py
@@ -0,0 +1,108 @@
+from ._base_api import BaseAPI
+from ._base_compat import (
+    PYDANTIC_V2,
+    ConfigDict,
+    GenericModel,
+    cached_property,
+    field_get_default,
+    get_args,
+    get_model_config,
+    get_model_fields,
+    get_origin,
+    is_literal_type,
+    is_union,
+    parse_obj,
+)
+from ._base_models import BaseModel, construct_type
+from ._base_type import (
+    NOT_GIVEN,
+    Body,
+    FileTypes,
+    Headers,
+    IncEx,
+    ModelT,
+    NotGiven,
+    Query,
+)
+from ._constants import (
+    ZHIPUAI_DEFAULT_LIMITS,
+    ZHIPUAI_DEFAULT_MAX_RETRIES,
+    ZHIPUAI_DEFAULT_TIMEOUT,
+)
+from ._errors import (
+    APIAuthenticationError,
+    APIConnectionError,
+    APIInternalError,
+    APIReachLimitError,
+    APIRequestFailedError,
+    APIResponseError,
+    APIResponseValidationError,
+    APIServerFlowExceedError,
+    APIStatusError,
+    APITimeoutError,
+    ZhipuAIError,
+)
+from ._files import is_file_content
+from ._http_client import HttpClient, make_request_options
+from ._sse_client import StreamResponse
+from ._utils import (
+    deepcopy_minimal,
+    drop_prefix_image_data,
+    extract_files,
+    is_given,
+    is_list,
+    is_mapping,
+    maybe_transform,
+    parse_date,
+    parse_datetime,
+)
+
+__all__ = [
+    "BaseModel",
+    "construct_type",
+    "BaseAPI",
+    "NOT_GIVEN",
+    "Headers",
+    "NotGiven",
+    "Body",
+    "IncEx",
+    "ModelT",
+    "Query",
+    "FileTypes",
+    "PYDANTIC_V2",
+    "ConfigDict",
+    "GenericModel",
+    "get_args",
+    "is_union",
+    "parse_obj",
+    "get_origin",
+    "is_literal_type",
+    "get_model_config",
+    "get_model_fields",
+    "field_get_default",
+    "is_file_content",
+    "ZhipuAIError",
+    "APIStatusError",
+    "APIRequestFailedError",
+    "APIAuthenticationError",
+    "APIReachLimitError",
+    "APIInternalError",
+    "APIServerFlowExceedError",
+    "APIResponseError",
+    "APIResponseValidationError",
+    "APITimeoutError",
+    "make_request_options",
+    "HttpClient",
+    "ZHIPUAI_DEFAULT_TIMEOUT",
+    "ZHIPUAI_DEFAULT_MAX_RETRIES",
+    "ZHIPUAI_DEFAULT_LIMITS",
+    "is_list",
+    "is_mapping",
+    "parse_date",
+    "parse_datetime",
+    "is_given",
+    "maybe_transform",
+    "deepcopy_minimal",
+    "extract_files",
+    "StreamResponse",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py
index 10b46ff8e3..3592ea6bac 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py
@@ -16,3 +16,4 @@ class BaseAPI:
         self._post = client.post
         self._put = client.put
         self._patch = client.patch
+        self._get_api_list = client.get_api_list
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py
new file mode 100644
index 0000000000..92a5d683be
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py
@@ -0,0 +1,209 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from datetime import date, datetime
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, overload
+
+import pydantic
+from pydantic.fields import FieldInfo
+from typing_extensions import Self
+
+from ._base_type import StrBytesIntFloat
+
+_T = TypeVar("_T")
+_ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
+
+# --------------- Pydantic v2 compatibility ---------------
+
+# Pyright incorrectly reports some of our functions as overriding a method when they don't
+# pyright: reportIncompatibleMethodOverride=false
+
+PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
+
+# v1 re-exports
+if TYPE_CHECKING:
+
+    def parse_date(value: date | StrBytesIntFloat) -> date: ...
+
+    def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime: ...
+
+    def get_args(t: type[Any]) -> tuple[Any, ...]: ...
+
+    def is_union(tp: type[Any] | None) -> bool: ...
+
+    def get_origin(t: type[Any]) -> type[Any] | None: ...
+
+    def is_literal_type(type_: type[Any]) -> bool: ...
+
+    def is_typeddict(type_: type[Any]) -> bool: ...
+
+else:
+    if PYDANTIC_V2:
+        from pydantic.v1.typing import (  # noqa: I001
+            get_args as get_args,  # noqa: PLC0414
+            is_union as is_union,  # noqa: PLC0414
+            get_origin as get_origin,  # noqa: PLC0414
+            is_typeddict as is_typeddict,  # noqa: PLC0414
+            is_literal_type as is_literal_type,  # noqa: PLC0414
+        )
+        from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime  # noqa: PLC0414
+    else:
+        from pydantic.typing import (  # noqa: I001
+            get_args as get_args,  # noqa: PLC0414
+            is_union as is_union,  # noqa: PLC0414
+            get_origin as get_origin,  # noqa: PLC0414
+            is_typeddict as is_typeddict,  # noqa: PLC0414
+            is_literal_type as is_literal_type,  # noqa: PLC0414
+        )
+        from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime  # noqa: PLC0414
+
+
+# refactored config
+if TYPE_CHECKING:
+    from pydantic import ConfigDict
+else:
+    if PYDANTIC_V2:
+        from pydantic import ConfigDict
+    else:
+        # TODO: provide an error message here?
+        ConfigDict = None
+
+
+# renamed methods / properties
+def parse_obj(model: type[_ModelT], value: object) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate(value)
+    else:
+        # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+        return cast(_ModelT, model.parse_obj(value))
+
+
+def field_is_required(field: FieldInfo) -> bool:
+    if PYDANTIC_V2:
+        return field.is_required()
+    return field.required  # type: ignore
+
+
+def field_get_default(field: FieldInfo) -> Any:
+    value = field.get_default()
+    if PYDANTIC_V2:
+        from pydantic_core import PydanticUndefined
+
+        if value == PydanticUndefined:
+            return None
+        return value
+    return value
+
+
+def field_outer_type(field: FieldInfo) -> Any:
+    if PYDANTIC_V2:
+        return field.annotation
+    return field.outer_type_  # type: ignore
+
+
+def get_model_config(model: type[pydantic.BaseModel]) -> Any:
+    if PYDANTIC_V2:
+        return model.model_config
+    return model.__config__  # type: ignore
+
+
+def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
+    if PYDANTIC_V2:
+        return model.model_fields
+    return model.__fields__  # type: ignore
+
+
+def model_copy(model: _ModelT) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_copy()
+    return model.copy()  # type: ignore
+
+
+def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
+    if PYDANTIC_V2:
+        return model.model_dump_json(indent=indent)
+    return model.json(indent=indent)  # type: ignore
+
+
+def model_dump(
+    model: pydantic.BaseModel,
+    *,
+    exclude_unset: bool = False,
+    exclude_defaults: bool = False,
+) -> dict[str, Any]:
+    if PYDANTIC_V2:
+        return model.model_dump(
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+        )
+    return cast(
+        "dict[str, Any]",
+        model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+        ),
+    )
+
+
+def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate(data)
+    return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
+
+
+# generic models
+if TYPE_CHECKING:
+
+    class GenericModel(pydantic.BaseModel): ...
+
+else:
+    if PYDANTIC_V2:
+        # there no longer needs to be a distinction in v2 but
+        # we still have to create our own subclass to avoid
+        # inconsistent MRO ordering errors
+        class GenericModel(pydantic.BaseModel): ...
+
+    else:
+        import pydantic.generics
+
+        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ...
+
+
+# cached properties
+if TYPE_CHECKING:
+    cached_property = property
+
+    # we define a separate type (copied from typeshed)
+    # that represents that `cached_property` is `set`able
+    # at runtime, which differs from `@property`.
+    #
+    # this is a separate type as editors likely special case
+    # `@property` and we don't want to cause issues just to have
+    # more helpful internal types.
+
+    class typed_cached_property(Generic[_T]):  # noqa: N801
+        func: Callable[[Any], _T]
+        attrname: str | None
+
+        def __init__(self, func: Callable[[Any], _T]) -> None: ...
+
+        @overload
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ...
+
+        @overload
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ...
+
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
+            raise NotImplementedError()
+
+        def __set_name__(self, owner: type[Any], name: str) -> None: ...
+
+        # __set__ is not defined at runtime, but @cached_property is designed to be settable
+        def __set__(self, instance: object, value: _T) -> None: ...
+else:
+    try:
+        from functools import cached_property
+    except ImportError:
+        from cached_property import cached_property
+
+    typed_cached_property = cached_property
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
new file mode 100644
index 0000000000..5e9a7e0a98
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
@@ -0,0 +1,671 @@
+from __future__ import annotations
+
+import inspect
+import os
+from collections.abc import Callable
+from datetime import date, datetime
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, TypeGuard, TypeVar, cast
+
+import pydantic
+import pydantic.generics
+from pydantic.fields import FieldInfo
+from typing_extensions import (
+    ParamSpec,
+    Protocol,
+    override,
+    runtime_checkable,
+)
+
+from ._base_compat import (
+    PYDANTIC_V2,
+    ConfigDict,
+    field_get_default,
+    get_args,
+    get_model_config,
+    get_model_fields,
+    get_origin,
+    is_literal_type,
+    is_union,
+    parse_obj,
+)
+from ._base_compat import (
+    GenericModel as BaseGenericModel,
+)
+from ._base_type import (
+    IncEx,
+    ModelT,
+)
+from ._utils import (
+    PropertyInfo,
+    coerce_boolean,
+    extract_type_arg,
+    is_annotated_type,
+    is_list,
+    is_mapping,
+    parse_date,
+    parse_datetime,
+    strip_annotated_type,
+)
+
+if TYPE_CHECKING:
+    from pydantic_core.core_schema import LiteralSchema, ModelField, ModelFieldsSchema
+
+__all__ = ["BaseModel", "GenericModel"]
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+_T = TypeVar("_T")
+P = ParamSpec("P")
+
+
+@runtime_checkable
+class _ConfigProtocol(Protocol):
+    allow_population_by_field_name: bool
+
+
+class BaseModel(pydantic.BaseModel):
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
+        )
+    else:
+
+        @property
+        @override
+        def model_fields_set(self) -> set[str]:
+            # a forwards-compat shim for pydantic v2
+            return self.__fields_set__  # type: ignore
+
+        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
+            extra: Any = pydantic.Extra.allow  # type: ignore
+
+    def to_dict(
+        self,
+        *,
+        mode: Literal["json", "python"] = "python",
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> dict[str, object]:
+        """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            mode:
+                If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`.
+                If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)`
+
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+            exclude_none: Whether to exclude fields that have a value of `None` from the output.
+            warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2.
+        """  # noqa: E501
+        return self.model_dump(
+            mode=mode,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    def to_json(
+        self,
+        *,
+        indent: int | None = 2,
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> str:
+        """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation).
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2`
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that have the default value.
+            exclude_none: Whether to exclude fields that have a value of `None`.
+            warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2.
+        """  # noqa: E501
+        return self.model_dump_json(
+            indent=indent,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    @override
+    def __str__(self) -> str:
+        # mypy complains about an invalid self arg
+        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+
+    # Override the 'construct' method in a way that supports recursive parsing without validation.
+    # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
+    @classmethod
+    @override
+    def construct(
+        cls: type[ModelT],
+        _fields_set: set[str] | None = None,
+        **values: object,
+    ) -> ModelT:
+        m = cls.__new__(cls)
+        fields_values: dict[str, object] = {}
+
+        config = get_model_config(cls)
+        populate_by_name = (
+            config.allow_population_by_field_name
+            if isinstance(config, _ConfigProtocol)
+            else config.get("populate_by_name")
+        )
+
+        if _fields_set is None:
+            _fields_set = set()
+
+        model_fields = get_model_fields(cls)
+        for name, field in model_fields.items():
+            key = field.alias
+            if key is None or (key not in values and populate_by_name):
+                key = name
+
+            if key in values:
+                fields_values[name] = _construct_field(value=values[key], field=field, key=key)
+                _fields_set.add(name)
+            else:
+                fields_values[name] = field_get_default(field)
+
+        _extra = {}
+        for key, value in values.items():
+            if key not in model_fields:
+                if PYDANTIC_V2:
+                    _extra[key] = value
+                else:
+                    _fields_set.add(key)
+                    fields_values[key] = value
+
+        object.__setattr__(m, "__dict__", fields_values)  # noqa: PLC2801
+
+        if PYDANTIC_V2:
+            # these properties are copied from Pydantic's `model_construct()` method
+            object.__setattr__(m, "__pydantic_private__", None)  # noqa: PLC2801
+            object.__setattr__(m, "__pydantic_extra__", _extra)  # noqa: PLC2801
+            object.__setattr__(m, "__pydantic_fields_set__", _fields_set)  # noqa: PLC2801
+        else:
+            # init_private_attributes() does not exist in v2
+            m._init_private_attributes()  # type: ignore
+
+            # copied from Pydantic v1's `construct()` method
+            object.__setattr__(m, "__fields_set__", _fields_set)  # noqa: PLC2801
+
+        return m
+
+    if not TYPE_CHECKING:
+        # type checkers incorrectly complain about this assignment
+        # because the type signatures are technically different
+        # although not in practice
+        model_construct = construct
+
+    if not PYDANTIC_V2:
+        # we define aliases for some of the new pydantic v2 methods so
+        # that we can just document these methods without having to specify
+        # a specific pydantic version as some users may not know which
+        # pydantic version they are currently using
+
+        @override
+        def model_dump(
+            self,
+            *,
+            mode: Literal["json", "python"] | str = "python",
+            include: IncEx = None,
+            exclude: IncEx = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
+        ) -> dict[str, Any]:
+            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
+
+            Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+            Args:
+                mode: The mode in which `to_python` should run.
+                    If mode is 'json', the dictionary will only contain JSON serializable types.
+                    If mode is 'python', the dictionary may contain any Python objects.
+                include: A list of fields to include in the output.
+                exclude: A list of fields to exclude from the output.
+                by_alias: Whether to use the field's alias in the dictionary key if defined.
+                exclude_unset: Whether to exclude fields that are unset or None from the output.
+                exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+                exclude_none: Whether to exclude fields that have a value of `None` from the output.
+                round_trip: Whether to enable serialization and deserialization round-trip support.
+                warnings: Whether to log warnings when invalid fields are encountered.
+
+            Returns:
+                A dictionary representation of the model.
+            """
+            if mode != "python":
+                raise ValueError("mode is only supported in Pydantic v2")
+            if round_trip != False:
+                raise ValueError("round_trip is only supported in Pydantic v2")
+            if warnings != True:
+                raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
+            return super().dict(  # pyright: ignore[reportDeprecated]
+                include=include,
+                exclude=exclude,
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+        @override
+        def model_dump_json(
+            self,
+            *,
+            indent: int | None = None,
+            include: IncEx = None,
+            exclude: IncEx = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
+        ) -> str:
+            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
+
+            Generates a JSON representation of the model using Pydantic's `to_json` method.
+
+            Args:
+                indent: Indentation to use in the JSON output. If None is passed, the output will be compact.
+                include: Field(s) to include in the JSON output. Can take either a string or set of strings.
+                exclude: Field(s) to exclude from the JSON output. Can take either a string or set of strings.
+                by_alias: Whether to serialize using field aliases.
+                exclude_unset: Whether to exclude fields that have not been explicitly set.
+                exclude_defaults: Whether to exclude fields that have the default value.
+                exclude_none: Whether to exclude fields that have a value of `None`.
+                round_trip: Whether to use serialization/deserialization between JSON and class instance.
+                warnings: Whether to show any warnings that occurred during serialization.
+
+            Returns:
+                A JSON string representation of the model.
+            """
+            if round_trip != False:
+                raise ValueError("round_trip is only supported in Pydantic v2")
+            if warnings != True:
+                raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
+            return super().json(  # type: ignore[reportDeprecated]
+                indent=indent,
+                include=include,
+                exclude=exclude,
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+
+def _construct_field(value: object, field: FieldInfo, key: str) -> object:
+    if value is None:
+        return field_get_default(field)
+
+    if PYDANTIC_V2:
+        type_ = field.annotation
+    else:
+        type_ = cast(type, field.outer_type_)  # type: ignore
+
+    if type_ is None:
+        raise RuntimeError(f"Unexpected field type is None for {key}")
+
+    return construct_type(value=value, type_=type_)
+
+
+def is_basemodel(type_: type) -> bool:
+    """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
+    if is_union(type_):
+        return any(is_basemodel(variant) for variant in get_args(type_))
+
+    return is_basemodel_type(type_)
+
+
+def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
+    origin = get_origin(type_) or type_
+    return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
+
+
+def build(
+    base_model_cls: Callable[P, _BaseModelT],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> _BaseModelT:
+    """Construct a BaseModel class without validation.
+
+    This is useful for cases where you need to instantiate a `BaseModel`
+    from an API response as this provides type-safe params which isn't supported
+    by helpers like `construct_type()`.
+
+    ```py
+    build(MyModel, my_field_a="foo", my_field_b=123)
+    ```
+    """
+    if args:
+        raise TypeError(
+            "Received positional arguments which are not supported; Keyword arguments must be used instead",
+        )
+
+    return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
+def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
+    """Loose coercion to the expected type with construction of nested values.
+
+    Note: the returned value from this function is not guaranteed to match the
+    given type.
+    """
+    return cast(_T, construct_type(value=value, type_=type_))
+
+
+def construct_type(*, value: object, type_: type) -> object:
+    """Loose coercion to the expected type with construction of nested values.
+
+    If the given value does not match the expected type then it is returned as-is.
+    """
+    # we allow `object` as the input type because otherwise, passing things like
+    # `Literal['value']` will be reported as a type error by type checkers
+    type_ = cast("type[object]", type_)
+
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        meta: tuple[Any, ...] = get_args(type_)[1:]
+        type_ = extract_type_arg(type_, 0)
+    else:
+        meta = ()
+    # we need to use the origin class for any types that are subscripted generics
+    # e.g. Dict[str, object]
+    origin = get_origin(type_) or type_
+    args = get_args(type_)
+
+    if is_union(origin):
+        try:
+            return validate_type(type_=cast("type[object]", type_), value=value)
+        except Exception:
+            pass
+
+        # if the type is a discriminated union then we want to construct the right variant
+        # in the union, even if the data doesn't match exactly, otherwise we'd break code
+        # that relies on the constructed class types, e.g.
+        #
+        # class FooType:
+        #   kind: Literal['foo']
+        #   value: str
+        #
+        # class BarType:
+        #   kind: Literal['bar']
+        #   value: int
+        #
+        # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
+        # we'd end up constructing `FooType` when it should be `BarType`.
+        discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta)
+        if discriminator and is_mapping(value):
+            variant_value = value.get(discriminator.field_alias_from or discriminator.field_name)
+            if variant_value and isinstance(variant_value, str):
+                variant_type = discriminator.mapping.get(variant_value)
+                if variant_type:
+                    return construct_type(type_=variant_type, value=value)
+
+        # if the data is not valid, use the first variant that doesn't fail while deserializing
+        for variant in args:
+            try:
+                return construct_type(value=value, type_=variant)
+            except Exception:
+                continue
+
+        raise RuntimeError(f"Could not convert data into a valid instance of {type_}")
+    if origin == dict:
+        if not is_mapping(value):
+            return value
+
+        _, items_type = get_args(type_)  # Dict[_, items_type]
+        return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
+
+    if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)):
+        if is_list(value):
+            return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
+
+        if is_mapping(value):
+            if issubclass(type_, BaseModel):
+                return type_.construct(**value)  # type: ignore[arg-type]
+
+            return cast(Any, type_).construct(**value)
+
+    if origin == list:
+        if not is_list(value):
+            return value
+
+        inner_type = args[0]  # List[inner_type]
+        return [construct_type(value=entry, type_=inner_type) for entry in value]
+
+    if origin == float:
+        if isinstance(value, int):
+            coerced = float(value)
+            if coerced != value:
+                return value
+            return coerced
+
+        return value
+
+    if type_ == datetime:
+        try:
+            return parse_datetime(value)  # type: ignore
+        except Exception:
+            return value
+
+    if type_ == date:
+        try:
+            return parse_date(value)  # type: ignore
+        except Exception:
+            return value
+
+    return value
+
+
+@runtime_checkable
+class CachedDiscriminatorType(Protocol):
+    __discriminator__: DiscriminatorDetails
+
+
+class DiscriminatorDetails:
+    field_name: str
+    """The name of the discriminator field in the variant class, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo']
+    ```
+
+    Will result in field_name='type'
+    """
+
+    field_alias_from: str | None
+    """The name of the discriminator field in the API response, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo'] = Field(alias='type_from_api')
+    ```
+
+    Will result in field_alias_from='type_from_api'
+    """
+
+    mapping: dict[str, type]
+    """Mapping of discriminator value to variant type, e.g.
+
+    {'foo': FooVariant, 'bar': BarVariant}
+    """
+
+    def __init__(
+        self,
+        *,
+        mapping: dict[str, type],
+        discriminator_field: str,
+        discriminator_alias: str | None,
+    ) -> None:
+        self.mapping = mapping
+        self.field_name = discriminator_field
+        self.field_alias_from = discriminator_alias
+
+
+def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
+    if isinstance(union, CachedDiscriminatorType):
+        return union.__discriminator__
+
+    discriminator_field_name: str | None = None
+
+    for annotation in meta_annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None:
+            discriminator_field_name = annotation.discriminator
+            break
+
+    if not discriminator_field_name:
+        return None
+
+    mapping: dict[str, type] = {}
+    discriminator_alias: str | None = None
+
+    for variant in get_args(union):
+        variant = strip_annotated_type(variant)
+        if is_basemodel_type(variant):
+            if PYDANTIC_V2:
+                field = _extract_field_schema_pv2(variant, discriminator_field_name)
+                if not field:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field.get("serialization_alias")
+
+                field_schema = field["schema"]
+
+                if field_schema["type"] == "literal":
+                    for entry in cast("LiteralSchema", field_schema)["expected"]:
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+            else:
+                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+                if not field_info:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field_info.alias
+
+                if field_info.annotation and is_literal_type(field_info.annotation):
+                    for entry in get_args(field_info.annotation):
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+
+    if not mapping:
+        return None
+
+    details = DiscriminatorDetails(
+        mapping=mapping,
+        discriminator_field=discriminator_field_name,
+        discriminator_alias=discriminator_alias,
+    )
+    cast(CachedDiscriminatorType, union).__discriminator__ = details
+    return details
+
+
+def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
+    schema = model.__pydantic_core_schema__
+    if schema["type"] != "model":
+        return None
+
+    fields_schema = schema["schema"]
+    if fields_schema["type"] != "model-fields":
+        return None
+
+    fields_schema = cast("ModelFieldsSchema", fields_schema)
+
+    field = fields_schema["fields"].get(field_name)
+    if not field:
+        return None
+
+    return cast("ModelField", field)  # pyright: ignore[reportUnnecessaryCast]
+
+
+def validate_type(*, type_: type[_T], value: object) -> _T:
+    """Strict validation that the given value matches the expected type"""
+    if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
+        return cast(_T, parse_obj(type_, value))
+
+    return cast(_T, _validate_non_model_type(type_=type_, value=value))
+
+
+# our use of subclasssing here causes weirdness for type checkers,
+# so we just pretend that we don't subclass
+if TYPE_CHECKING:
+    GenericModel = BaseModel
+else:
+
+    class GenericModel(BaseGenericModel, BaseModel):
+        pass
+
+
+if PYDANTIC_V2:
+    from pydantic import TypeAdapter
+
+    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+        return TypeAdapter(type_).validate_python(value)
+
+elif not TYPE_CHECKING:
+
+    class TypeAdapter(Generic[_T]):
+        """Used as a placeholder to easily convert runtime types to a Pydantic format
+        to provide validation.
+
+        For example:
+        ```py
+        validated = RootModel[int](__root__="5").__root__
+        # validated: 5
+        ```
+        """
+
+        def __init__(self, type_: type[_T]):
+            self.type_ = type_
+
+        def validate_python(self, value: Any) -> _T:
+            if not isinstance(value, self.type_):
+                raise ValueError(f"Invalid type: {value} is not of type {self.type_}")
+            return value
+
+    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+        return TypeAdapter(type_).validate_python(value)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py
index 7a91f9b796..ea1d3f09dc 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py
@@ -1,11 +1,21 @@
 from __future__ import annotations
 
-from collections.abc import Mapping, Sequence
+from collections.abc import Callable, Mapping, Sequence
 from os import PathLike
-from typing import IO, TYPE_CHECKING, Any, Literal, TypeVar, Union
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Literal,
+    Optional,
+    TypeAlias,
+    TypeVar,
+    Union,
+)
 
 import pydantic
-from typing_extensions import override
+from httpx import Response
+from typing_extensions import Protocol, TypedDict, override, runtime_checkable
 
 Query = Mapping[str, object]
 Body = object
@@ -22,7 +32,7 @@ else:
 
 
 # Sentinel class used until PEP 0661 is accepted
-class NotGiven(pydantic.BaseModel):
+class NotGiven:
     """
     A sentinel singleton class used to distinguish omitted keyword arguments
     from those passed in with the value None (which may have different behavior).
@@ -50,7 +60,7 @@ NotGivenOr = Union[_T, NotGiven]
 NOT_GIVEN = NotGiven()
 
 
-class Omit(pydantic.BaseModel):
+class Omit:
     """In certain situations you need to be able to represent a case where a default value has
     to be explicitly removed and `None` is not an appropriate substitute, for example:
 
@@ -71,37 +81,90 @@ class Omit(pydantic.BaseModel):
         return False
 
 
+@runtime_checkable
+class ModelBuilderProtocol(Protocol):
+    @classmethod
+    def build(
+        cls: type[_T],
+        *,
+        response: Response,
+        data: object,
+    ) -> _T: ...
+
+
 Headers = Mapping[str, Union[str, Omit]]
 
+
+class HeadersLikeProtocol(Protocol):
+    def get(self, __key: str) -> str | None: ...
+
+
+HeadersLike = Union[Headers, HeadersLikeProtocol]
+
 ResponseT = TypeVar(
     "ResponseT",
-    bound="Union[str, None, BaseModel, list[Any], Dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol,"
-    " BinaryResponseContent]",
+    bound="Union[str, None, BaseModel, list[Any], dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol, BinaryResponseContent]",  # noqa: E501
 )
 
+StrBytesIntFloat = Union[str, bytes, int, float]
+
+# Note: copied from Pydantic
+# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
+IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+
+PostParser = Callable[[Any], Any]
+
+
+@runtime_checkable
+class InheritsGeneric(Protocol):
+    """Represents a type that has inherited from `Generic`
+
+    The `__orig_bases__` property can be used to determine the resolved
+    type variable for a given base class.
+    """
+
+    __orig_bases__: tuple[_GenericAlias]
+
+
+class _GenericAlias(Protocol):
+    __origin__: type[object]
+
+
+class HttpxSendArgs(TypedDict, total=False):
+    auth: httpx.Auth
+
+
 # for user input files
 if TYPE_CHECKING:
+    Base64FileInput = Union[IO[bytes], PathLike[str]]
     FileContent = Union[IO[bytes], bytes, PathLike[str]]
 else:
+    Base64FileInput = Union[IO[bytes], PathLike]
     FileContent = Union[IO[bytes], bytes, PathLike]
 
 FileTypes = Union[
-    FileContent,  # file content
-    tuple[str, FileContent],  # (filename, file)
-    tuple[str, FileContent, str],  # (filename, file , content_type)
-    tuple[str, FileContent, str, Mapping[str, str]],  # (filename, file , content_type, headers)
+    # file (or bytes)
+    FileContent,
+    # (filename, file (or bytes))
+    tuple[Optional[str], FileContent],
+    # (filename, file (or bytes), content_type)
+    tuple[Optional[str], FileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
 ]
-
 RequestFiles = Union[Mapping[str, FileTypes], Sequence[tuple[str, FileTypes]]]
 
-# for httpx client supported files
-
+# duplicate of the above but without our custom file support
 HttpxFileContent = Union[bytes, IO[bytes]]
 HttpxFileTypes = Union[
-    FileContent,  # file content
-    tuple[str, HttpxFileContent],  # (filename, file)
-    tuple[str, HttpxFileContent, str],  # (filename, file , content_type)
-    tuple[str, HttpxFileContent, str, Mapping[str, str]],  # (filename, file , content_type, headers)
+    # file (or bytes)
+    HttpxFileContent,
+    # (filename, file (or bytes))
+    tuple[Optional[str], HttpxFileContent],
+    # (filename, file (or bytes), content_type)
+    tuple[Optional[str], HttpxFileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    tuple[Optional[str], HttpxFileContent, Optional[str], Mapping[str, str]],
 ]
 
 HttpxRequestFiles = Union[Mapping[str, HttpxFileTypes], Sequence[tuple[str, HttpxFileTypes]]]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py
new file mode 100644
index 0000000000..8e43bdebec
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py
@@ -0,0 +1,12 @@
+import httpx
+
+RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
+# 通过 `Timeout` 控制接口`connect` 和 `read` 超时时间，默认为`timeout=300.0, connect=8.0`
+ZHIPUAI_DEFAULT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=8.0)
+# 通过 `retry` 参数控制重试次数，默认为3次
+ZHIPUAI_DEFAULT_MAX_RETRIES = 3
+# 通过 `Limits` 控制最大连接数和保持连接数，默认为`max_connections=50, max_keepalive_connections=10`
+ZHIPUAI_DEFAULT_LIMITS = httpx.Limits(max_connections=50, max_keepalive_connections=10)
+
+INITIAL_RETRY_DELAY = 0.5
+MAX_RETRY_DELAY = 8.0
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py
index 1027c1bc5b..e2c9d24c6c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py
@@ -13,6 +13,7 @@ __all__ = [
     "APIResponseError",
     "APIResponseValidationError",
     "APITimeoutError",
+    "APIConnectionError",
 ]
 
 
@@ -24,7 +25,7 @@ class ZhipuAIError(Exception):
         super().__init__(message)
 
 
-class APIStatusError(Exception):
+class APIStatusError(ZhipuAIError):
     response: httpx.Response
     status_code: int
 
@@ -49,7 +50,7 @@ class APIInternalError(APIStatusError): ...
 class APIServerFlowExceedError(APIStatusError): ...
 
 
-class APIResponseError(Exception):
+class APIResponseError(ZhipuAIError):
     message: str
     request: httpx.Request
     json_data: object
@@ -75,9 +76,11 @@ class APIResponseValidationError(APIResponseError):
         self.status_code = response.status_code
 
 
-class APITimeoutError(Exception):
-    request: httpx.Request
+class APIConnectionError(APIResponseError):
+    def __init__(self, *, message: str = "Connection error.", request: httpx.Request) -> None:
+        super().__init__(message, request, json_data=None)
 
-    def __init__(self, request: httpx.Request):
-        self.request = request
-        super().__init__("Request Timeout")
+
+class APITimeoutError(APIConnectionError):
+    def __init__(self, request: httpx.Request) -> None:
+        super().__init__(message="Request timed out.", request=request)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py
index 0796bfe11c..f9d2e14d9e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py
@@ -2,40 +2,74 @@ from __future__ import annotations
 
 import io
 import os
-from collections.abc import Mapping, Sequence
-from pathlib import Path
+import pathlib
+from typing import TypeGuard, overload
 
-from ._base_type import FileTypes, HttpxFileTypes, HttpxRequestFiles, RequestFiles
+from ._base_type import (
+    Base64FileInput,
+    FileContent,
+    FileTypes,
+    HttpxFileContent,
+    HttpxFileTypes,
+    HttpxRequestFiles,
+    RequestFiles,
+)
+from ._utils import is_mapping_t, is_sequence_t, is_tuple_t
 
 
-def is_file_content(obj: object) -> bool:
+def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
+    return isinstance(obj, io.IOBase | os.PathLike)
+
+
+def is_file_content(obj: object) -> TypeGuard[FileContent]:
     return isinstance(obj, bytes | tuple | io.IOBase | os.PathLike)
 
 
+def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
+    if not is_file_content(obj):
+        prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
+        raise RuntimeError(
+            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/openai/openai-python/tree/main#file-uploads"
+        ) from None
+
+
+@overload
+def to_httpx_files(files: None) -> None: ...
+
+
+@overload
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
+
+
+def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
+    if files is None:
+        return None
+
+    if is_mapping_t(files):
+        files = {key: _transform_file(file) for key, file in files.items()}
+    elif is_sequence_t(files):
+        files = [(key, _transform_file(file)) for key, file in files]
+    else:
+        raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence")
+
+    return files
+
+
 def _transform_file(file: FileTypes) -> HttpxFileTypes:
     if is_file_content(file):
         if isinstance(file, os.PathLike):
-            path = Path(file)
-            return path.name, path.read_bytes()
-        else:
-            return file
-    if isinstance(file, tuple):
-        if isinstance(file[1], os.PathLike):
-            return (file[0], Path(file[1]).read_bytes(), *file[2:])
-        else:
-            return (file[0], file[1], *file[2:])
-    else:
-        raise TypeError(f"Unexpected input file with type {type(file)},Expected FileContent type or tuple type")
+            path = pathlib.Path(file)
+            return (path.name, path.read_bytes())
+
+        return file
+
+    if is_tuple_t(file):
+        return (file[0], _read_file_content(file[1]), *file[2:])
+
+    raise TypeError("Expected file types input to be a FileContent type or to be a tuple")
 
 
-def make_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
-    if files is None:
-        return None
-
-    if isinstance(files, Mapping):
-        files = {key: _transform_file(file) for key, file in files.items()}
-    elif isinstance(files, Sequence):
-        files = [(key, _transform_file(file)) for key, file in files]
-    else:
-        raise TypeError(f"Unexpected input file with type {type(files)}, excepted Mapping or Sequence")
-    return files
+def _read_file_content(file: FileContent) -> HttpxFileContent:
+    if isinstance(file, os.PathLike):
+        return pathlib.Path(file).read_bytes()
+    return file
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
index 5f7f6d04f2..d0f933d814 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
@@ -1,23 +1,70 @@
 from __future__ import annotations
 
 import inspect
-from collections.abc import Mapping
-from typing import Any, Union, cast
+import logging
+import time
+import warnings
+from collections.abc import Iterator, Mapping
+from itertools import starmap
+from random import random
+from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, TypeVar, Union, cast, overload
 
 import httpx
 import pydantic
 from httpx import URL, Timeout
-from tenacity import retry
-from tenacity.stop import stop_after_attempt
 
-from . import _errors
-from ._base_type import NOT_GIVEN, AnyMapping, Body, Data, Headers, NotGiven, Query, RequestFiles, ResponseT
-from ._errors import APIResponseValidationError, APIStatusError, APITimeoutError
-from ._files import make_httpx_files
-from ._request_opt import ClientRequestParam, UserRequestInput
-from ._response import HttpResponse
+from . import _errors, get_origin
+from ._base_compat import model_copy
+from ._base_models import GenericModel, construct_type, validate_type
+from ._base_type import (
+    NOT_GIVEN,
+    AnyMapping,
+    Body,
+    Data,
+    Headers,
+    HttpxSendArgs,
+    ModelBuilderProtocol,
+    NotGiven,
+    Omit,
+    PostParser,
+    Query,
+    RequestFiles,
+    ResponseT,
+)
+from ._constants import (
+    INITIAL_RETRY_DELAY,
+    MAX_RETRY_DELAY,
+    RAW_RESPONSE_HEADER,
+    ZHIPUAI_DEFAULT_LIMITS,
+    ZHIPUAI_DEFAULT_MAX_RETRIES,
+    ZHIPUAI_DEFAULT_TIMEOUT,
+)
+from ._errors import APIConnectionError, APIResponseValidationError, APIStatusError, APITimeoutError
+from ._files import to_httpx_files
+from ._legacy_response import LegacyAPIResponse
+from ._request_opt import FinalRequestOptions, UserRequestInput
+from ._response import APIResponse, BaseAPIResponse, extract_response_type
 from ._sse_client import StreamResponse
-from ._utils import flatten
+from ._utils import flatten, is_given, is_mapping
+
+log: logging.Logger = logging.getLogger(__name__)
+
+# TODO: make base page type vars covariant
+SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]")
+# AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]")
+
+_T = TypeVar("_T")
+_T_co = TypeVar("_T_co", covariant=True)
+
+if TYPE_CHECKING:
+    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+else:
+    try:
+        from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    except ImportError:
+        # taken from https://github.com/encode/httpx/blob/3ba5fe0d7ac70222590e759c31442b1cab263791/httpx/_config.py#L366
+        HTTPX_DEFAULT_TIMEOUT = Timeout(5.0)
+
 
 headers = {
     "Accept": "application/json",
@@ -25,50 +72,180 @@ headers = {
 }
 
 
-def _merge_map(map1: Mapping, map2: Mapping) -> Mapping:
-    merged = {**map1, **map2}
-    return {key: val for key, val in merged.items() if val is not None}
+class PageInfo:
+    """Stores the necessary information to build the request to retrieve the next page.
+
+    Either `url` or `params` must be set.
+    """
+
+    url: URL | NotGiven
+    params: Query | NotGiven
+
+    @overload
+    def __init__(
+        self,
+        *,
+        url: URL,
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        params: Query,
+    ) -> None: ...
+
+    def __init__(
+        self,
+        *,
+        url: URL | NotGiven = NOT_GIVEN,
+        params: Query | NotGiven = NOT_GIVEN,
+    ) -> None:
+        self.url = url
+        self.params = params
 
 
-from itertools import starmap
+class BasePage(GenericModel, Generic[_T]):
+    """
+    Defines the core interface for pagination.
 
-from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    Type Args:
+        ModelT: The pydantic model that represents an item in the response.
 
-ZHIPUAI_DEFAULT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=8.0)
-ZHIPUAI_DEFAULT_MAX_RETRIES = 3
-ZHIPUAI_DEFAULT_LIMITS = httpx.Limits(max_connections=5, max_keepalive_connections=5)
+    Methods:
+        has_next_page(): Check if there is another page available
+        next_page_info(): Get the necessary information to make a request for the next page
+    """
+
+    _options: FinalRequestOptions = pydantic.PrivateAttr()
+    _model: type[_T] = pydantic.PrivateAttr()
+
+    def has_next_page(self) -> bool:
+        items = self._get_page_items()
+        if not items:
+            return False
+        return self.next_page_info() is not None
+
+    def next_page_info(self) -> Optional[PageInfo]: ...
+
+    def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
+        ...
+
+    def _params_from_url(self, url: URL) -> httpx.QueryParams:
+        # TODO: do we have to preprocess params here?
+        return httpx.QueryParams(cast(Any, self._options.params)).merge(url.params)
+
+    def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
+        options = model_copy(self._options)
+        options._strip_raw_response_header()
+
+        if not isinstance(info.params, NotGiven):
+            options.params = {**options.params, **info.params}
+            return options
+
+        if not isinstance(info.url, NotGiven):
+            params = self._params_from_url(info.url)
+            url = info.url.copy_with(params=params)
+            options.params = dict(url.params)
+            options.url = str(url)
+            return options
+
+        raise ValueError("Unexpected PageInfo state")
+
+
+class BaseSyncPage(BasePage[_T], Generic[_T]):
+    _client: HttpClient = pydantic.PrivateAttr()
+
+    def _set_private_attributes(
+        self,
+        client: HttpClient,
+        model: type[_T],
+        options: FinalRequestOptions,
+    ) -> None:
+        self._model = model
+        self._client = client
+        self._options = options
+
+    # Pydantic uses a custom `__iter__` method to support casting BaseModels
+    # to dictionaries. e.g. dict(model).
+    # As we want to support `for item in page`, this is inherently incompatible
+    # with the default pydantic behaviour. It is not possible to support both
+    # use cases at once. Fortunately, this is not a big deal as all other pydantic
+    # methods should continue to work as expected as there is an alternative method
+    # to cast a model to a dictionary, model.dict(), which is used internally
+    # by pydantic.
+    def __iter__(self) -> Iterator[_T]:  # type: ignore
+        for page in self.iter_pages():
+            yield from page._get_page_items()
+
+    def iter_pages(self: SyncPageT) -> Iterator[SyncPageT]:
+        page = self
+        while True:
+            yield page
+            if page.has_next_page():
+                page = page.get_next_page()
+            else:
+                return
+
+    def get_next_page(self: SyncPageT) -> SyncPageT:
+        info = self.next_page_info()
+        if not info:
+            raise RuntimeError(
+                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
+            )
+
+        options = self._info_to_options(info)
+        return self._client._request_api_list(self._model, page=self.__class__, options=options)
 
 
 class HttpClient:
     _client: httpx.Client
     _version: str
     _base_url: URL
-
+    max_retries: int
     timeout: Union[float, Timeout, None]
     _limits: httpx.Limits
     _has_custom_http_client: bool
     _default_stream_cls: type[StreamResponse[Any]] | None = None
 
+    _strict_response_validation: bool
+
     def __init__(
         self,
         *,
         version: str,
         base_url: URL,
+        _strict_response_validation: bool,
+        max_retries: int = ZHIPUAI_DEFAULT_MAX_RETRIES,
         timeout: Union[float, Timeout, None],
+        limits: httpx.Limits | None = None,
         custom_httpx_client: httpx.Client | None = None,
         custom_headers: Mapping[str, str] | None = None,
     ) -> None:
-        if timeout is None or isinstance(timeout, NotGiven):
+        if limits is not None:
+            warnings.warn(
+                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",  # noqa: E501
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if custom_httpx_client is not None:
+                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
+        else:
+            limits = ZHIPUAI_DEFAULT_LIMITS
+
+        if not is_given(timeout):
             if custom_httpx_client and custom_httpx_client.timeout != HTTPX_DEFAULT_TIMEOUT:
                 timeout = custom_httpx_client.timeout
             else:
                 timeout = ZHIPUAI_DEFAULT_TIMEOUT
-        self.timeout = cast(Timeout, timeout)
+        self.max_retries = max_retries
+        self.timeout = timeout
+        self._limits = limits
         self._has_custom_http_client = bool(custom_httpx_client)
         self._client = custom_httpx_client or httpx.Client(
             base_url=base_url,
             timeout=self.timeout,
-            limits=ZHIPUAI_DEFAULT_LIMITS,
+            limits=limits,
         )
         self._version = version
         url = URL(url=base_url)
@@ -76,6 +253,7 @@ class HttpClient:
             url = url.copy_with(raw_path=url.raw_path + b"/")
         self._base_url = url
         self._custom_headers = custom_headers or {}
+        self._strict_response_validation = _strict_response_validation
 
     def _prepare_url(self, url: str) -> URL:
         sub_url = URL(url)
@@ -93,55 +271,101 @@ class HttpClient:
             "ZhipuAI-SDK-Ver": self._version,
             "source_type": "zhipu-sdk-python",
             "x-request-sdk": "zhipu-sdk-python",
-            **self._auth_headers,
+            **self.auth_headers,
             **self._custom_headers,
         }
 
     @property
-    def _auth_headers(self):
+    def custom_auth(self) -> httpx.Auth | None:
+        return None
+
+    @property
+    def auth_headers(self):
         return {}
 
-    def _prepare_headers(self, request_param: ClientRequestParam) -> httpx.Headers:
-        custom_headers = request_param.headers or {}
-        headers_dict = _merge_map(self._default_headers, custom_headers)
+    def _prepare_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+        custom_headers = options.headers or {}
+        headers_dict = _merge_mappings(self._default_headers, custom_headers)
 
         httpx_headers = httpx.Headers(headers_dict)
 
         return httpx_headers
 
-    def _prepare_request(self, request_param: ClientRequestParam) -> httpx.Request:
+    def _remaining_retries(
+        self,
+        remaining_retries: Optional[int],
+        options: FinalRequestOptions,
+    ) -> int:
+        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
+
+    def _calculate_retry_timeout(
+        self,
+        remaining_retries: int,
+        options: FinalRequestOptions,
+        response_headers: Optional[httpx.Headers] = None,
+    ) -> float:
+        max_retries = options.get_max_retries(self.max_retries)
+
+        # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
+        # retry_after = self._parse_retry_after_header(response_headers)
+        # if retry_after is not None and 0 < retry_after <= 60:
+        #     return retry_after
+
+        nb_retries = max_retries - remaining_retries
+
+        # Apply exponential backoff, but not more than the max.
+        sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
+
+        # Apply some jitter, plus-or-minus half a second.
+        jitter = 1 - 0.25 * random()
+        timeout = sleep_seconds * jitter
+        return max(timeout, 0)
+
+    def _build_request(self, options: FinalRequestOptions) -> httpx.Request:
         kwargs: dict[str, Any] = {}
-        json_data = request_param.json_data
-        headers = self._prepare_headers(request_param)
-        url = self._prepare_url(request_param.url)
-        json_data = request_param.json_data
+        headers = self._prepare_headers(options)
+        url = self._prepare_url(options.url)
+        json_data = options.json_data
+        if options.extra_json is not None:
+            if json_data is None:
+                json_data = cast(Body, options.extra_json)
+            elif is_mapping(json_data):
+                json_data = _merge_mappings(json_data, options.extra_json)
+            else:
+                raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
+
+        content_type = headers.get("Content-Type")
+        # multipart/form-data; boundary=---abc--
         if headers.get("Content-Type") == "multipart/form-data":
-            headers.pop("Content-Type")
+            if "boundary" not in content_type:
+                # only remove the header if the boundary hasn't been explicitly set
+                # as the caller doesn't want httpx to come up with their own boundary
+                headers.pop("Content-Type")
 
             if json_data:
                 kwargs["data"] = self._make_multipartform(json_data)
 
         return self._client.build_request(
             headers=headers,
-            timeout=self.timeout if isinstance(request_param.timeout, NotGiven) else request_param.timeout,
-            method=request_param.method,
+            timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
+            method=options.method,
             url=url,
             json=json_data,
-            files=request_param.files,
-            params=request_param.params,
+            files=options.files,
+            params=options.params,
             **kwargs,
         )
 
-    def _object_to_formdata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]:
+    def _object_to_formfata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]:
         items = []
 
         if isinstance(value, Mapping):
             for k, v in value.items():
-                items.extend(self._object_to_formdata(f"{key}[{k}]", v))
+                items.extend(self._object_to_formfata(f"{key}[{k}]", v))
             return items
         if isinstance(value, list | tuple):
             for v in value:
-                items.extend(self._object_to_formdata(key + "[]", v))
+                items.extend(self._object_to_formfata(key + "[]", v))
             return items
 
         def _primitive_value_to_str(val) -> str:
@@ -161,7 +385,7 @@ class HttpClient:
         return [(key, str_data)]
 
     def _make_multipartform(self, data: Mapping[object, object]) -> dict[str, object]:
-        items = flatten(list(starmap(self._object_to_formdata, data.items())))
+        items = flatten(list(starmap(self._object_to_formfata, data.items())))
 
         serialized: dict[str, object] = {}
         for key, value in items:
@@ -170,20 +394,6 @@ class HttpClient:
             serialized[key] = value
         return serialized
 
-    def _parse_response(
-        self,
-        *,
-        cast_type: type[ResponseT],
-        response: httpx.Response,
-        enable_stream: bool,
-        request_param: ClientRequestParam,
-        stream_cls: type[StreamResponse[Any]] | None = None,
-    ) -> HttpResponse:
-        http_response = HttpResponse(
-            raw_response=response, cast_type=cast_type, client=self, enable_stream=enable_stream, stream_cls=stream_cls
-        )
-        return http_response.parse()
-
     def _process_response_data(
         self,
         *,
@@ -194,14 +404,58 @@ class HttpClient:
         if data is None:
             return cast(ResponseT, None)
 
-        try:
-            if inspect.isclass(cast_type) and issubclass(cast_type, pydantic.BaseModel):
-                return cast(ResponseT, cast_type.validate(data))
+        if cast_type is object:
+            return cast(ResponseT, data)
 
-            return cast(ResponseT, pydantic.TypeAdapter(cast_type).validate_python(data))
+        try:
+            if inspect.isclass(cast_type) and issubclass(cast_type, ModelBuilderProtocol):
+                return cast(ResponseT, cast_type.build(response=response, data=data))
+
+            if self._strict_response_validation:
+                return cast(ResponseT, validate_type(type_=cast_type, value=data))
+
+            return cast(ResponseT, construct_type(type_=cast_type, value=data))
         except pydantic.ValidationError as err:
             raise APIResponseValidationError(response=response, json_data=data) from err
 
+    def _should_stream_response_body(self, request: httpx.Request) -> bool:
+        return request.headers.get(RAW_RESPONSE_HEADER) == "stream"  # type: ignore[no-any-return]
+
+    def _should_retry(self, response: httpx.Response) -> bool:
+        # Note: this is not a standard header
+        should_retry_header = response.headers.get("x-should-retry")
+
+        # If the server explicitly says whether or not to retry, obey.
+        if should_retry_header == "true":
+            log.debug("Retrying as header `x-should-retry` is set to `true`")
+            return True
+        if should_retry_header == "false":
+            log.debug("Not retrying as header `x-should-retry` is set to `false`")
+            return False
+
+        # Retry on request timeouts.
+        if response.status_code == 408:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry on lock timeouts.
+        if response.status_code == 409:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry on rate limits.
+        if response.status_code == 429:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry internal errors.
+        if response.status_code >= 500:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        log.debug("Not retrying")
+        return False
+
     def is_closed(self) -> bool:
         return self._client.is_closed
 
@@ -214,117 +468,385 @@ class HttpClient:
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
-    @retry(stop=stop_after_attempt(ZHIPUAI_DEFAULT_MAX_RETRIES))
     def request(
+        self,
+        cast_type: type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: bool = False,
+        stream_cls: type[StreamResponse] | None = None,
+    ) -> ResponseT | StreamResponse:
+        return self._request(
+            cast_type=cast_type,
+            options=options,
+            stream=stream,
+            stream_cls=stream_cls,
+            remaining_retries=remaining_retries,
+        )
+
+    def _request(
         self,
         *,
         cast_type: type[ResponseT],
-        params: ClientRequestParam,
-        enable_stream: bool = False,
-        stream_cls: type[StreamResponse[Any]] | None = None,
+        options: FinalRequestOptions,
+        remaining_retries: int | None,
+        stream: bool,
+        stream_cls: type[StreamResponse] | None,
     ) -> ResponseT | StreamResponse:
-        request = self._prepare_request(params)
+        retries = self._remaining_retries(remaining_retries, options)
+        request = self._build_request(options)
 
+        kwargs: HttpxSendArgs = {}
+        if self.custom_auth is not None:
+            kwargs["auth"] = self.custom_auth
         try:
             response = self._client.send(
                 request,
-                stream=enable_stream,
+                stream=stream or self._should_stream_response_body(request=request),
+                **kwargs,
             )
-            response.raise_for_status()
         except httpx.TimeoutException as err:
+            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+            if retries > 0:
+                return self._retry_request(
+                    options,
+                    cast_type,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising timeout error")
             raise APITimeoutError(request=request) from err
-        except httpx.HTTPStatusError as err:
-            err.response.read()
-            # raise err
+        except Exception as err:
+            log.debug("Encountered Exception", exc_info=True)
+
+            if retries > 0:
+                return self._retry_request(
+                    options,
+                    cast_type,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising connection error")
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+            if retries > 0 and self._should_retry(err.response):
+                err.response.close()
+                return self._retry_request(
+                    options,
+                    cast_type,
+                    retries,
+                    err.response.headers,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                )
+
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                err.response.read()
+
+            log.debug("Re-raising status error")
             raise self._make_status_error(err.response) from None
 
-        except Exception as err:
-            raise err
-
-        return self._parse_response(
+        # return self._parse_response(
+        #     cast_type=cast_type,
+        #     options=options,
+        #     response=response,
+        #     stream=stream,
+        #     stream_cls=stream_cls,
+        # )
+        return self._process_response(
             cast_type=cast_type,
-            request_param=params,
+            options=options,
             response=response,
-            enable_stream=enable_stream,
+            stream=stream,
             stream_cls=stream_cls,
         )
 
+    def _retry_request(
+        self,
+        options: FinalRequestOptions,
+        cast_type: type[ResponseT],
+        remaining_retries: int,
+        response_headers: httpx.Headers | None,
+        *,
+        stream: bool,
+        stream_cls: type[StreamResponse] | None,
+    ) -> ResponseT | StreamResponse:
+        remaining = remaining_retries - 1
+        if remaining == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining)
+
+        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
+        # different thread if necessary.
+        time.sleep(timeout)
+
+        return self._request(
+            options=options,
+            cast_type=cast_type,
+            remaining_retries=remaining,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    def _process_response(
+        self,
+        *,
+        cast_type: type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[StreamResponse] | None,
+    ) -> ResponseT:
+        # _legacy_response with raw_response_header to paser method
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_type=cast_type,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        origin = get_origin(cast_type) or cast_type
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, APIResponse):
+                raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_type)
+            return cast(
+                ResponseT,
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_type=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        if cast_type == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = APIResponse(
+            raw=response,
+            client=self,
+            cast_type=cast("type[ResponseT]", cast_type),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return api_response.parse()
+
+    def _request_api_list(
+        self,
+        model: type[object],
+        page: type[SyncPageT],
+        options: FinalRequestOptions,
+    ) -> SyncPageT:
+        def _parser(resp: SyncPageT) -> SyncPageT:
+            resp._set_private_attributes(
+                client=self,
+                model=model,
+                options=options,
+            )
+            return resp
+
+        options.post_parser = _parser
+
+        return self.request(page, options, stream=False)
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_type: type[ResponseT],
+        options: UserRequestInput = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_type: type[ResponseT],
+        options: UserRequestInput = {},
+        stream: Literal[True],
+        stream_cls: type[StreamResponse],
+    ) -> StreamResponse: ...
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_type: type[ResponseT],
+        options: UserRequestInput = {},
+        stream: bool,
+        stream_cls: type[StreamResponse] | None = None,
+    ) -> ResponseT | StreamResponse: ...
+
     def get(
         self,
         path: str,
         *,
         cast_type: type[ResponseT],
         options: UserRequestInput = {},
-        enable_stream: bool = False,
-    ) -> ResponseT | StreamResponse:
-        opts = ClientRequestParam.construct(method="get", url=path, **options)
-        return self.request(cast_type=cast_type, params=opts, enable_stream=enable_stream)
+        stream: bool = False,
+        stream_cls: type[StreamResponse] | None = None,
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(method="get", url=path, **options)
+        return cast(ResponseT, self.request(cast_type, opts, stream=stream, stream_cls=stream_cls))
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_type: type[ResponseT],
+        body: Body | None = None,
+        options: UserRequestInput = {},
+        files: RequestFiles | None = None,
+        stream: Literal[False] = False,
+    ) -> ResponseT: ...
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_type: type[ResponseT],
+        body: Body | None = None,
+        options: UserRequestInput = {},
+        files: RequestFiles | None = None,
+        stream: Literal[True],
+        stream_cls: type[StreamResponse],
+    ) -> StreamResponse: ...
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_type: type[ResponseT],
+        body: Body | None = None,
+        options: UserRequestInput = {},
+        files: RequestFiles | None = None,
+        stream: bool,
+        stream_cls: type[StreamResponse] | None = None,
+    ) -> ResponseT | StreamResponse: ...
 
     def post(
         self,
         path: str,
         *,
-        body: Body | None = None,
         cast_type: type[ResponseT],
+        body: Body | None = None,
         options: UserRequestInput = {},
         files: RequestFiles | None = None,
-        enable_stream: bool = False,
+        stream: bool = False,
         stream_cls: type[StreamResponse[Any]] | None = None,
     ) -> ResponseT | StreamResponse:
-        opts = ClientRequestParam.construct(
-            method="post", json_data=body, files=make_httpx_files(files), url=path, **options
+        opts = FinalRequestOptions.construct(
+            method="post", url=path, json_data=body, files=to_httpx_files(files), **options
         )
 
-        return self.request(cast_type=cast_type, params=opts, enable_stream=enable_stream, stream_cls=stream_cls)
+        return cast(ResponseT, self.request(cast_type, opts, stream=stream, stream_cls=stream_cls))
 
     def patch(
         self,
         path: str,
         *,
-        body: Body | None = None,
         cast_type: type[ResponseT],
+        body: Body | None = None,
         options: UserRequestInput = {},
     ) -> ResponseT:
-        opts = ClientRequestParam.construct(method="patch", url=path, json_data=body, **options)
+        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
 
         return self.request(
             cast_type=cast_type,
-            params=opts,
+            options=opts,
         )
 
     def put(
         self,
         path: str,
         *,
-        body: Body | None = None,
         cast_type: type[ResponseT],
+        body: Body | None = None,
         options: UserRequestInput = {},
         files: RequestFiles | None = None,
     ) -> ResponseT | StreamResponse:
-        opts = ClientRequestParam.construct(
-            method="put", url=path, json_data=body, files=make_httpx_files(files), **options
+        opts = FinalRequestOptions.construct(
+            method="put", url=path, json_data=body, files=to_httpx_files(files), **options
         )
 
         return self.request(
             cast_type=cast_type,
-            params=opts,
+            options=opts,
         )
 
     def delete(
         self,
         path: str,
         *,
-        body: Body | None = None,
         cast_type: type[ResponseT],
+        body: Body | None = None,
         options: UserRequestInput = {},
     ) -> ResponseT | StreamResponse:
-        opts = ClientRequestParam.construct(method="delete", url=path, json_data=body, **options)
+        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
 
         return self.request(
             cast_type=cast_type,
-            params=opts,
+            options=opts,
         )
 
+    def get_api_list(
+        self,
+        path: str,
+        *,
+        model: type[object],
+        page: type[SyncPageT],
+        body: Body | None = None,
+        options: UserRequestInput = {},
+        method: str = "get",
+    ) -> SyncPageT:
+        opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
+        return self._request_api_list(model, page, opts)
+
     def _make_status_error(self, response) -> APIStatusError:
         response_text = response.text.strip()
         status_code = response.status_code
@@ -343,24 +865,46 @@ class HttpClient:
         return APIStatusError(message=error_msg, response=response)
 
 
-def make_user_request_input(
-    max_retries: int | None = None,
-    timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-    extra_headers: Headers = None,
-    extra_body: Body | None = None,
+def make_request_options(
+    *,
     query: Query | None = None,
+    extra_headers: Headers | None = None,
+    extra_query: Query | None = None,
+    extra_body: Body | None = None,
+    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    post_parser: PostParser | NotGiven = NOT_GIVEN,
 ) -> UserRequestInput:
+    """Create a dict of type RequestOptions without keys of NotGiven values."""
     options: UserRequestInput = {}
-
     if extra_headers is not None:
         options["headers"] = extra_headers
-    if max_retries is not None:
-        options["max_retries"] = max_retries
-    if not isinstance(timeout, NotGiven):
-        options["timeout"] = timeout
-    if query is not None:
-        options["params"] = query
+
     if extra_body is not None:
         options["extra_json"] = cast(AnyMapping, extra_body)
 
+    if query is not None:
+        options["params"] = query
+
+    if extra_query is not None:
+        options["params"] = {**options.get("params", {}), **extra_query}
+
+    if not isinstance(timeout, NotGiven):
+        options["timeout"] = timeout
+
+    if is_given(post_parser):
+        # internal
+        options["post_parser"] = post_parser  # type: ignore
+
     return options
+
+
+def _merge_mappings(
+    obj1: Mapping[_T_co, Union[_T, Omit]],
+    obj2: Mapping[_T_co, Union[_T, Omit]],
+) -> dict[_T_co, _T]:
+    """Merge two mappings of the same type, removing any values that are instances of `Omit`.
+
+    In cases with duplicate keys the second mapping takes precedence.
+    """
+    merged = {**obj1, **obj2}
+    return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py
index b0a91d04a9..21f158a5f4 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py
@@ -3,9 +3,11 @@ import time
 import cachetools.func
 import jwt
 
-API_TOKEN_TTL_SECONDS = 3 * 60
+# 缓存时间 3分钟
+CACHE_TTL_SECONDS = 3 * 60
 
-CACHE_TTL_SECONDS = API_TOKEN_TTL_SECONDS - 30
+# token 有效期比缓存时间 多30秒
+API_TOKEN_TTL_SECONDS = CACHE_TTL_SECONDS + 30
 
 
 @cachetools.func.ttl_cache(maxsize=10, ttl=CACHE_TTL_SECONDS)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py
new file mode 100644
index 0000000000..51623bd860
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py
@@ -0,0 +1,207 @@
+from __future__ import annotations
+
+import os
+from collections.abc import AsyncIterator, Iterator
+from typing import Any
+
+import httpx
+
+
+class HttpxResponseContent:
+    @property
+    def content(self) -> bytes:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    @property
+    def text(self) -> str:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    @property
+    def encoding(self) -> str | None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    @property
+    def charset_encoding(self) -> str | None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def json(self, **kwargs: Any) -> Any:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def read(self) -> bytes:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def iter_lines(self) -> Iterator[str]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    def close(self) -> None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def aread(self) -> bytes:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def astream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+    async def aclose(self) -> None:
+        raise NotImplementedError("This method is not implemented for this class.")
+
+
+class HttpxBinaryResponseContent(HttpxResponseContent):
+    response: httpx.Response
+
+    def __init__(self, response: httpx.Response) -> None:
+        self.response = response
+
+    @property
+    def content(self) -> bytes:
+        return self.response.content
+
+    @property
+    def encoding(self) -> str | None:
+        return self.response.encoding
+
+    @property
+    def charset_encoding(self) -> str | None:
+        return self.response.charset_encoding
+
+    def read(self) -> bytes:
+        return self.response.read()
+
+    def text(self) -> str:
+        raise NotImplementedError("Not implemented for binary response content")
+
+    def json(self, **kwargs: Any) -> Any:
+        raise NotImplementedError("Not implemented for binary response content")
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        raise NotImplementedError("Not implemented for binary response content")
+
+    def iter_lines(self) -> Iterator[str]:
+        raise NotImplementedError("Not implemented for binary response content")
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        raise NotImplementedError("Not implemented for binary response content")
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        raise NotImplementedError("Not implemented for binary response content")
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_bytes(chunk_size)
+
+    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_raw(chunk_size)
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes():
+                f.write(data)
+
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes(chunk_size):
+                f.write(data)
+
+    def close(self) -> None:
+        return self.response.close()
+
+    async def aread(self) -> bytes:
+        return await self.response.aread()
+
+    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_bytes(chunk_size)
+
+    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_raw(chunk_size)
+
+    async def astream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.response.aiter_bytes(chunk_size):
+                await f.write(data)
+
+    async def aclose(self) -> None:
+        return await self.response.aclose()
+
+
+class HttpxTextBinaryResponseContent(HttpxBinaryResponseContent):
+    response: httpx.Response
+
+    @property
+    def text(self) -> str:
+        return self.response.text
+
+    def json(self, **kwargs: Any) -> Any:
+        return self.response.json(**kwargs)
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        return self.response.iter_text(chunk_size)
+
+    def iter_lines(self) -> Iterator[str]:
+        return self.response.iter_lines()
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        return self.response.aiter_text(chunk_size)
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        return self.response.aiter_lines()
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
new file mode 100644
index 0000000000..47183b9eee
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
@@ -0,0 +1,341 @@
+from __future__ import annotations
+
+import datetime
+import functools
+import inspect
+import logging
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, get_origin, overload
+
+import httpx
+import pydantic
+from typing_extensions import ParamSpec, override
+
+from ._base_models import BaseModel, is_basemodel
+from ._base_type import NoneType
+from ._constants import RAW_RESPONSE_HEADER
+from ._errors import APIResponseValidationError
+from ._legacy_binary_response import HttpxResponseContent, HttpxTextBinaryResponseContent
+from ._sse_client import StreamResponse, extract_stream_chunk_type, is_stream_class_type
+from ._utils import extract_type_arg, is_annotated_type, is_given
+
+if TYPE_CHECKING:
+    from ._http_client import HttpClient
+    from ._request_opt import FinalRequestOptions
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class LegacyAPIResponse(Generic[R]):
+    """This is a legacy class as it will be replaced by `APIResponse`
+    and `AsyncAPIResponse` in the `_response.py` file in the next major
+    release.
+
+    For the sync client this will mostly be the same with the exception
+    of `content` & `text` will be methods instead of properties. In the
+    async client, all methods will be async.
+
+    A migration script will be provided & the migration in general should
+    be smooth.
+    """
+
+    _cast_type: type[R]
+    _client: HttpClient
+    _parsed_by_type: dict[type[Any], Any]
+    _stream: bool
+    _stream_cls: type[StreamResponse[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_type: type[R],
+        client: HttpClient,
+        stream: bool,
+        stream_cls: type[StreamResponse[Any]] | None,
+        options: FinalRequestOptions,
+    ) -> None:
+        self._cast_type = cast_type
+        self._client = client
+        self._parsed_by_type = {}
+        self._stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    def parse(self) -> R: ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        NOTE: For the async client: this will become a coroutine in the next major version.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from zhipuai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_type
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed_by_type[cache_key] = parsed
+        return parsed
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(self) -> httpx.URL:
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def content(self) -> bytes:
+        """Return the binary response content.
+
+        NOTE: this will be removed in favour of `.read()` in the
+        next major version.
+        """
+        return self.http_response.content
+
+    @property
+    def text(self) -> str:
+        """Return the decoded response content.
+
+        NOTE: this will be turned into a method in the next major version.
+        """
+        return self.http_response.text
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def is_closed(self) -> bool:
+        return self.http_response.is_closed
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        # unwrap `Annotated[T, ...]` -> `T`
+        if to and is_annotated_type(to):
+            to = extract_type_arg(to, 0)
+
+        if self._stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {StreamResponse}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_type=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. StreamResponse[ChunkType]",  # noqa: E501
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_type=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast("type[StreamResponse[Any]] | None", self._client._default_stream_cls)
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_type=self._cast_type,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        cast_type = to if to is not None else self._cast_type
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if is_annotated_type(cast_type):
+            cast_type = extract_type_arg(cast_type, 0)
+
+        if cast_type is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_type == str:
+            return cast(R, response.text)
+
+        if cast_type == int:
+            return cast(R, int(response.text))
+
+        if cast_type == float:
+            return cast(R, float(response.text))
+
+        origin = get_origin(cast_type) or cast_type
+
+        if inspect.isclass(origin) and issubclass(origin, HttpxResponseContent):
+            # in the response, e.g. mime file
+            *_, filename = response.headers.get("content-disposition", "").split("filename=")
+            # 判断文件类型是jsonl类型的使用HttpxTextBinaryResponseContent
+            if filename and filename.endswith(".jsonl") or filename and filename.endswith(".xlsx"):
+                return cast(R, HttpxTextBinaryResponseContent(response))
+            else:
+                return cast(R, cast_type(response))  # type: ignore
+
+        if origin == LegacyAPIResponse:
+            raise RuntimeError("Unexpected state - cast_type is `APIResponse`")
+
+        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_type != httpx.Response:
+                raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_type`")
+            return cast(R, response)
+
+        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
+        if (
+            cast_type is not object
+            and origin is not list
+            and origin is not dict
+            and origin is not Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_type} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."  # noqa: E501
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_type):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_type=cast_type,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",  # noqa: E501
+                    json_data=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_type=cast_type,  # type: ignore
+            response=response,
+        )
+
+    @override
+    def __repr__(self) -> str:
+        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_type}>"
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",  # noqa: E501
+        )
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], func(*args, **kwargs))
+
+    return wrapped
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py
index ac459151fc..c3b894b3a3 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py
@@ -1,48 +1,97 @@
 from __future__ import annotations
 
-from typing import Any, ClassVar, Union
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, ClassVar, Union, cast
 
+import pydantic.generics
 from httpx import Timeout
-from pydantic import ConfigDict
-from typing_extensions import TypedDict, Unpack
+from typing_extensions import Required, TypedDict, Unpack, final
 
-from ._base_type import Body, Headers, HttpxRequestFiles, NotGiven, Query
-from ._utils import remove_notgiven_indict
+from ._base_compat import PYDANTIC_V2, ConfigDict
+from ._base_type import AnyMapping, Body, Headers, HttpxRequestFiles, NotGiven, Query
+from ._constants import RAW_RESPONSE_HEADER
+from ._utils import is_given, strip_not_given
 
 
 class UserRequestInput(TypedDict, total=False):
+    headers: Headers
     max_retries: int
     timeout: float | Timeout | None
+    params: Query
+    extra_json: AnyMapping
+
+
+class FinalRequestOptionsInput(TypedDict, total=False):
+    method: Required[str]
+    url: Required[str]
+    params: Query
     headers: Headers
-    params: Query | None
+    max_retries: int
+    timeout: float | Timeout | None
+    files: HttpxRequestFiles | None
+    json_data: Body
+    extra_json: AnyMapping
 
 
-class ClientRequestParam:
+@final
+class FinalRequestOptions(pydantic.BaseModel):
     method: str
     url: str
-    max_retries: Union[int, NotGiven] = NotGiven()
-    timeout: Union[float, NotGiven] = NotGiven()
-    headers: Union[Headers, NotGiven] = NotGiven()
-    json_data: Union[Body, None] = None
-    files: Union[HttpxRequestFiles, None] = None
     params: Query = {}
-    model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
+    headers: Union[Headers, NotGiven] = NotGiven()
+    max_retries: Union[int, NotGiven] = NotGiven()
+    timeout: Union[float, Timeout, None, NotGiven] = NotGiven()
+    files: Union[HttpxRequestFiles, None] = None
+    idempotency_key: Union[str, None] = None
+    post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
 
-    def get_max_retries(self, max_retries) -> int:
+    # It should be noted that we cannot use `json` here as that would override
+    # a BaseModel method in an incompatible fashion.
+    json_data: Union[Body, None] = None
+    extra_json: Union[AnyMapping, None] = None
+
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
+    else:
+
+        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
+            arbitrary_types_allowed: bool = True
+
+    def get_max_retries(self, max_retries: int) -> int:
         if isinstance(self.max_retries, NotGiven):
             return max_retries
         return self.max_retries
 
+    def _strip_raw_response_header(self) -> None:
+        if not is_given(self.headers):
+            return
+
+        if self.headers.get(RAW_RESPONSE_HEADER):
+            self.headers = {**self.headers}
+            self.headers.pop(RAW_RESPONSE_HEADER)
+
+    # override the `construct` method so that we can run custom transformations.
+    # this is necessary as we don't want to do any actual runtime type checking
+    # (which means we can't use validators) but we do want to ensure that `NotGiven`
+    # values are not present
+    #
+    # type ignore required because we're adding explicit types to `**values`
     @classmethod
     def construct(  # type: ignore
         cls,
         _fields_set: set[str] | None = None,
         **values: Unpack[UserRequestInput],
-    ) -> ClientRequestParam:
-        kwargs: dict[str, Any] = {key: remove_notgiven_indict(value) for key, value in values.items()}
-        client = cls()
-        client.__dict__.update(kwargs)
+    ) -> FinalRequestOptions:
+        kwargs: dict[str, Any] = {
+            # we unconditionally call `strip_not_given` on any value
+            # as it will just ignore any non-mapping types
+            key: strip_not_given(value)
+            for key, value in values.items()
+        }
+        if PYDANTIC_V2:
+            return super().model_construct(_fields_set, **kwargs)
+        return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs))  # pyright: ignore[reportDeprecated]
 
-        return client
-
-    model_construct = construct
+    if not TYPE_CHECKING:
+        # type checkers incorrectly complain about this assignment
+        model_construct = construct
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
index 56e60a7934..45443da662 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
@@ -1,87 +1,193 @@
 from __future__ import annotations
 
 import datetime
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast, get_args, get_origin
+import inspect
+import logging
+from collections.abc import Iterator
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, get_origin, overload
 
 import httpx
 import pydantic
-from typing_extensions import ParamSpec
+from typing_extensions import ParamSpec, override
 
+from ._base_models import BaseModel, is_basemodel
 from ._base_type import NoneType
-from ._sse_client import StreamResponse
+from ._errors import APIResponseValidationError, ZhipuAIError
+from ._sse_client import StreamResponse, extract_stream_chunk_type, is_stream_class_type
+from ._utils import extract_type_arg, extract_type_var_from_base, is_annotated_type, is_given
 
 if TYPE_CHECKING:
     from ._http_client import HttpClient
+    from ._request_opt import FinalRequestOptions
 
 P = ParamSpec("P")
 R = TypeVar("R")
+_T = TypeVar("_T")
+_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
+log: logging.Logger = logging.getLogger(__name__)
 
 
-class HttpResponse(Generic[R]):
+class BaseAPIResponse(Generic[R]):
     _cast_type: type[R]
     _client: HttpClient
-    _parsed: R | None
-    _enable_stream: bool
+    _parsed_by_type: dict[type[Any], Any]
+    _is_sse_stream: bool
     _stream_cls: type[StreamResponse[Any]]
+    _options: FinalRequestOptions
     http_response: httpx.Response
 
     def __init__(
         self,
         *,
-        raw_response: httpx.Response,
+        raw: httpx.Response,
         cast_type: type[R],
         client: HttpClient,
-        enable_stream: bool = False,
+        stream: bool,
         stream_cls: type[StreamResponse[Any]] | None = None,
+        options: FinalRequestOptions,
     ) -> None:
         self._cast_type = cast_type
         self._client = client
-        self._parsed = None
+        self._parsed_by_type = {}
+        self._is_sse_stream = stream
         self._stream_cls = stream_cls
-        self._enable_stream = enable_stream
-        self.http_response = raw_response
+        self._options = options
+        self.http_response = raw
 
-    def parse(self) -> R:
-        self._parsed = self._parse()
-        return self._parsed
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        # unwrap `Annotated[T, ...]` -> `T`
+        if to and is_annotated_type(to):
+            to = extract_type_arg(to, 0)
 
-    def _parse(self) -> R:
-        if self._enable_stream:
-            self._parsed = cast(
-                R,
-                self._stream_cls(
-                    cast_type=cast(type, get_args(self._stream_cls)[0]),
-                    response=self.http_response,
-                    client=self._client,
-                ),
-            )
-            return self._parsed
-        cast_type = self._cast_type
-        if cast_type is NoneType:
-            return cast(R, None)
-        http_response = self.http_response
-        if cast_type == str:
-            return cast(R, http_response.text)
+        if self._is_sse_stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {StreamResponse}")
 
-        content_type, *_ = http_response.headers.get("content-type", "application/json").split(";")
-        origin = get_origin(cast_type) or cast_type
-        if content_type != "application/json":
-            if issubclass(origin, pydantic.BaseModel):
-                data = http_response.json()
-                return self._client._process_response_data(
-                    data=data,
-                    cast_type=cast_type,  # type: ignore
-                    response=http_response,
+                return cast(
+                    _T,
+                    to(
+                        cast_type=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. StreamResponse[ChunkType]",  # noqa: E501
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
                 )
 
-            return http_response.text
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_type=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
 
-        data = http_response.json()
+            stream_cls = cast("type[Stream[Any]] | None", self._client._default_stream_cls)
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_type=self._cast_type,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        cast_type = to if to is not None else self._cast_type
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if is_annotated_type(cast_type):
+            cast_type = extract_type_arg(cast_type, 0)
+
+        if cast_type is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_type == str:
+            return cast(R, response.text)
+
+        if cast_type == bytes:
+            return cast(R, response.content)
+
+        if cast_type == int:
+            return cast(R, int(response.text))
+
+        if cast_type == float:
+            return cast(R, float(response.text))
+
+        origin = get_origin(cast_type) or cast_type
+
+        # handle the legacy binary response case
+        if inspect.isclass(cast_type) and cast_type.__name__ == "HttpxBinaryResponseContent":
+            return cast(R, cast_type(response))  # type: ignore
+
+        if origin == APIResponse:
+            raise RuntimeError("Unexpected state - cast_type is `APIResponse`")
+
+        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_type != httpx.Response:
+                raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_type`")
+            return cast(R, response)
+
+        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
+        if (
+            cast_type is not object
+            and origin is not list
+            and origin is not dict
+            and origin is not Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_type} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."  # noqa: E501
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_type):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_type=cast_type,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",  # noqa: E501
+                    json_data=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
 
         return self._client._process_response_data(
             data=data,
             cast_type=cast_type,  # type: ignore
-            response=http_response,
+            response=response,
         )
 
     @property
@@ -90,6 +196,7 @@ class HttpResponse(Generic[R]):
 
     @property
     def http_request(self) -> httpx.Request:
+        """Returns the httpx Request instance associated with the current response."""
         return self.http_response.request
 
     @property
@@ -98,24 +205,194 @@ class HttpResponse(Generic[R]):
 
     @property
     def url(self) -> httpx.URL:
+        """Returns the URL for which the request was made."""
         return self.http_response.url
 
     @property
     def method(self) -> str:
         return self.http_request.method
 
-    @property
-    def content(self) -> bytes:
-        return self.http_response.content
-
-    @property
-    def text(self) -> str:
-        return self.http_response.text
-
     @property
     def http_version(self) -> str:
         return self.http_response.http_version
 
     @property
     def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
         return self.http_response.elapsed
+
+    @property
+    def is_closed(self) -> bool:
+        """Whether or not the response body has been closed.
+
+        If this is False then there is response data that has not been read yet.
+        You must either fully consume the response body or call `.close()`
+        before discarding the response to prevent resource leaks.
+        """
+        return self.http_response.is_closed
+
+    @override
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_type}>"  # noqa: E501
+
+
+class APIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    def parse(self) -> R: ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_type
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed_by_type[cache_key] = parsed
+        return parsed
+
+    def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return self.http_response.read()
+        except httpx.StreamConsumed as exc:
+            # The default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message.
+            raise StreamAlreadyConsumed() from exc
+
+    def text(self) -> str:
+        """Read and decode the response content into a string."""
+        self.read()
+        return self.http_response.text
+
+    def json(self) -> object:
+        """Read and decode the JSON response content."""
+        self.read()
+        return self.http_response.json()
+
+    def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.http_response.close()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        yield from self.http_response.iter_bytes(chunk_size)
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        yield from self.http_response.iter_text(chunk_size)
+
+    def iter_lines(self) -> Iterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        yield from self.http_response.iter_lines()
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",  # noqa: E501
+        )
+
+
+class StreamAlreadyConsumed(ZhipuAIError):  # noqa: N818
+    """
+    Attempted to read or stream content, but the content has already
+    been streamed.
+
+    This can happen if you use a method like `.iter_lines()` and then attempt
+    to read th entire response body afterwards, e.g.
+
+    ```py
+    response = await client.post(...)
+    async for line in response.iter_lines():
+        ...  # do something with `line`
+
+    content = await response.read()
+    # ^ error
+    ```
+
+    If you want this behaviour you'll need to either manually accumulate the response
+    content or call `await response.read()` before iterating over the stream.
+    """
+
+    def __init__(self) -> None:
+        message = (
+            "Attempted to read or stream some content, but the content has "
+            "already been streamed. "
+            "This could be due to attempting to stream the response "
+            "content more than once."
+            "\n\n"
+            "You can fix this by manually accumulating the response content while streaming "
+            "or by calling `.read()` before starting to stream."
+        )
+        super().__init__(message)
+
+
+def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
+    """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(APIResponse[bytes]):
+        ...
+
+    extract_response_type(MyResponse) -> bytes
+    ```
+    """
+    return extract_type_var_from_base(
+        typ,
+        generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse)),
+        index=0,
+    )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py
index ec2745d059..cbc449d244 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py
@@ -1,13 +1,16 @@
 from __future__ import annotations
 
+import inspect
 import json
 from collections.abc import Iterator, Mapping
-from typing import TYPE_CHECKING, Generic
+from typing import TYPE_CHECKING, Generic, TypeGuard, cast
 
 import httpx
 
+from . import get_origin
 from ._base_type import ResponseT
 from ._errors import APIResponseError
+from ._utils import extract_type_var_from_base, is_mapping
 
 _FIELD_SEPARATOR = ":"
 
@@ -53,8 +56,41 @@ class StreamResponse(Generic[ResponseT]):
                         request=self.response.request,
                         json_data=data["error"],
                     )
+            if sse.event is None:
+                data = sse.json_data()
+                if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
 
+                    raise APIResponseError(
+                        message=message,
+                        request=self.response.request,
+                        json_data=data["error"],
+                    )
                 yield self._data_process_func(data=data, cast_type=self._cast_type, response=self.response)
+
+            else:
+                data = sse.json_data()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIResponseError(
+                        message=message,
+                        request=self.response.request,
+                        json_data=data["error"],
+                    )
+                yield self._data_process_func(data=data, cast_type=self._cast_type, response=self.response)
+
         for sse in iterator:
             pass
 
@@ -138,3 +174,33 @@ class SSELineParser:
             except (TypeError, ValueError):
                 pass
         return
+
+
+def is_stream_class_type(typ: type) -> TypeGuard[type[StreamResponse[object]]]:
+    """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
+    origin = get_origin(typ) or typ
+    return inspect.isclass(origin) and issubclass(origin, StreamResponse)
+
+
+def extract_stream_chunk_type(
+    stream_cls: type,
+    *,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `StreamResponse[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyStream(StreamResponse[bytes]):
+        ...
+
+    extract_stream_chunk_type(MyStream) -> bytes
+    ```
+    """
+
+    return extract_type_var_from_base(
+        stream_cls,
+        index=0,
+        generic_bases=cast("tuple[type, ...]", (StreamResponse,)),
+        failure_message=failure_message,
+    )
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py
deleted file mode 100644
index 6b610567da..0000000000
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from collections.abc import Iterable, Mapping
-from typing import TypeVar
-
-from ._base_type import NotGiven
-
-
-def remove_notgiven_indict(obj):
-    if obj is None or (not isinstance(obj, Mapping)):
-        return obj
-    return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
-
-
-_T = TypeVar("_T")
-
-
-def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
-    return [item for sublist in t for item in sublist]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py
new file mode 100644
index 0000000000..a66b095816
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py
@@ -0,0 +1,52 @@
+from ._utils import (  # noqa: I001
+    remove_notgiven_indict as remove_notgiven_indict,  # noqa: PLC0414
+    flatten as flatten,  # noqa: PLC0414
+    is_dict as is_dict,  # noqa: PLC0414
+    is_list as is_list,  # noqa: PLC0414
+    is_given as is_given,  # noqa: PLC0414
+    is_tuple as is_tuple,  # noqa: PLC0414
+    is_mapping as is_mapping,  # noqa: PLC0414
+    is_tuple_t as is_tuple_t,  # noqa: PLC0414
+    parse_date as parse_date,  # noqa: PLC0414
+    is_iterable as is_iterable,  # noqa: PLC0414
+    is_sequence as is_sequence,  # noqa: PLC0414
+    coerce_float as coerce_float,  # noqa: PLC0414
+    is_mapping_t as is_mapping_t,  # noqa: PLC0414
+    removeprefix as removeprefix,  # noqa: PLC0414
+    removesuffix as removesuffix,  # noqa: PLC0414
+    extract_files as extract_files,  # noqa: PLC0414
+    is_sequence_t as is_sequence_t,  # noqa: PLC0414
+    required_args as required_args,  # noqa: PLC0414
+    coerce_boolean as coerce_boolean,  # noqa: PLC0414
+    coerce_integer as coerce_integer,  # noqa: PLC0414
+    file_from_path as file_from_path,  # noqa: PLC0414
+    parse_datetime as parse_datetime,  # noqa: PLC0414
+    strip_not_given as strip_not_given,  # noqa: PLC0414
+    deepcopy_minimal as deepcopy_minimal,  # noqa: PLC0414
+    get_async_library as get_async_library,  # noqa: PLC0414
+    maybe_coerce_float as maybe_coerce_float,  # noqa: PLC0414
+    get_required_header as get_required_header,  # noqa: PLC0414
+    maybe_coerce_boolean as maybe_coerce_boolean,  # noqa: PLC0414
+    maybe_coerce_integer as maybe_coerce_integer,  # noqa: PLC0414
+    drop_prefix_image_data as drop_prefix_image_data,  # noqa: PLC0414
+)
+
+
+from ._typing import (
+    is_list_type as is_list_type,  # noqa: PLC0414
+    is_union_type as is_union_type,  # noqa: PLC0414
+    extract_type_arg as extract_type_arg,  # noqa: PLC0414
+    is_iterable_type as is_iterable_type,  # noqa: PLC0414
+    is_required_type as is_required_type,  # noqa: PLC0414
+    is_annotated_type as is_annotated_type,  # noqa: PLC0414
+    strip_annotated_type as strip_annotated_type,  # noqa: PLC0414
+    extract_type_var_from_base as extract_type_var_from_base,  # noqa: PLC0414
+)
+
+from ._transform import (
+    PropertyInfo as PropertyInfo,  # noqa: PLC0414
+    transform as transform,  # noqa: PLC0414
+    async_transform as async_transform,  # noqa: PLC0414
+    maybe_transform as maybe_transform,  # noqa: PLC0414
+    async_maybe_transform as async_maybe_transform,  # noqa: PLC0414
+)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py
new file mode 100644
index 0000000000..e8ef1f7935
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py
@@ -0,0 +1,383 @@
+from __future__ import annotations
+
+import base64
+import io
+import pathlib
+from collections.abc import Mapping
+from datetime import date, datetime
+from typing import Any, Literal, TypeVar, cast, get_args, get_type_hints
+
+import anyio
+import pydantic
+from typing_extensions import override
+
+from .._base_compat import is_typeddict, model_dump
+from .._files import is_base64_file_input
+from ._typing import (
+    extract_type_arg,
+    is_annotated_type,
+    is_iterable_type,
+    is_list_type,
+    is_required_type,
+    is_union_type,
+    strip_annotated_type,
+)
+from ._utils import (
+    is_iterable,
+    is_list,
+    is_mapping,
+)
+
+_T = TypeVar("_T")
+
+
+# TODO: support for drilling globals() and locals()
+# TODO: ensure works correctly with forward references in all cases
+
+
+PropertyFormat = Literal["iso8601", "base64", "custom"]
+
+
+class PropertyInfo:
+    """Metadata class to be used in Annotated types to provide information about a given type.
+
+    For example:
+
+    class MyParams(TypedDict):
+        account_holder_name: Annotated[str, PropertyInfo(alias='accountHolderName')]
+
+    This means that {'account_holder_name': 'Robert'} will be transformed to {'accountHolderName': 'Robert'} before being sent to the API.
+    """  # noqa: E501
+
+    alias: str | None
+    format: PropertyFormat | None
+    format_template: str | None
+    discriminator: str | None
+
+    def __init__(
+        self,
+        *,
+        alias: str | None = None,
+        format: PropertyFormat | None = None,
+        format_template: str | None = None,
+        discriminator: str | None = None,
+    ) -> None:
+        self.alias = alias
+        self.format = format
+        self.format_template = format_template
+        self.discriminator = discriminator
+
+    @override
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')"  # noqa: E501
+
+
+def maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `transform()` that allows `None` to be passed.
+
+    See `transform()` for more details.
+    """
+    if data is None:
+        return None
+    return transform(data, expected_type)
+
+
+# Wrapper over _transform_recursive providing fake types
+def transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = _transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+def _get_annotated_type(type_: type) -> type | None:
+    """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
+
+    This also unwraps the type when applicable, e.g. `Required[Annotated[T, ...]]`
+    """
+    if is_required_type(type_):
+        # Unwrap `Required[Annotated[T, ...]]` to `Annotated[T, ...]`
+        type_ = get_args(type_)[0]
+
+    if is_annotated_type(type_):
+        return type_
+
+    return None
+
+
+def _maybe_transform_key(key: str, type_: type) -> str:
+    """Transform the given `data` based on the annotations provided in `type_`.
+
+    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+    """
+    annotated_type = _get_annotated_type(type_)
+    if annotated_type is None:
+        # no `Annotated` definition for this type, no transformation needed
+        return key
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.alias is not None:
+            return annotation.alias
+
+    return key
+
+
+def _transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return _transform_typeddict(data, stripped_type)
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = _transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True)
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return _format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+def _format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+    if isinstance(data, date | datetime):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = data.read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+def _transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_)
+    return result
+
+
+async def async_maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `async_transform()` that allows `None` to be passed.
+
+    See `async_transform()` for more details.
+    """
+    if data is None:
+        return None
+    return await async_transform(data, expected_type)
+
+
+async def async_transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+async def _async_transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return await _async_transform_typeddict(data, stripped_type)
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True)
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return await _async_format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+    if isinstance(data, date | datetime):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = await anyio.Path(data).read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+async def _async_transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
+    return result
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py
new file mode 100644
index 0000000000..c7c54dcc37
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from collections import abc as _c_abc
+from collections.abc import Iterable
+from typing import Annotated, Any, TypeVar, cast, get_args, get_origin
+
+from typing_extensions import Required
+
+from .._base_compat import is_union as _is_union
+from .._base_type import InheritsGeneric
+
+
+def is_annotated_type(typ: type) -> bool:
+    return get_origin(typ) == Annotated
+
+
+def is_list_type(typ: type) -> bool:
+    return (get_origin(typ) or typ) == list
+
+
+def is_iterable_type(typ: type) -> bool:
+    """If the given type is `typing.Iterable[T]`"""
+    origin = get_origin(typ) or typ
+    return origin in {Iterable, _c_abc.Iterable}
+
+
+def is_union_type(typ: type) -> bool:
+    return _is_union(get_origin(typ))
+
+
+def is_required_type(typ: type) -> bool:
+    return get_origin(typ) == Required
+
+
+def is_typevar(typ: type) -> bool:
+    # type ignore is required because type checkers
+    # think this expression will always return False
+    return type(typ) == TypeVar  # type: ignore
+
+
+# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+def strip_annotated_type(typ: type) -> type:
+    if is_required_type(typ) or is_annotated_type(typ):
+        return strip_annotated_type(cast(type, get_args(typ)[0]))
+
+    return typ
+
+
+def extract_type_arg(typ: type, index: int) -> type:
+    args = get_args(typ)
+    try:
+        return cast(type, args[index])
+    except IndexError as err:
+        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
+
+
+def extract_type_var_from_base(
+    typ: type,
+    *,
+    generic_bases: tuple[type, ...],
+    index: int,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Foo[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(Foo[bytes]):
+        ...
+
+    extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes
+    ```
+
+    And where a generic subclass is given:
+    ```py
+    _T = TypeVar('_T')
+    class MyResponse(Foo[_T]):
+        ...
+
+    extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes
+    ```
+    """
+    cls = cast(object, get_origin(typ) or typ)
+    if cls in generic_bases:
+        # we're given the class directly
+        return extract_type_arg(typ, index)
+
+    # if a subclass is given
+    # ---
+    # this is needed as __orig_bases__ is not present in the typeshed stubs
+    # because it is intended to be for internal use only, however there does
+    # not seem to be a way to resolve generic TypeVars for inherited subclasses
+    # without using it.
+    if isinstance(cls, InheritsGeneric):
+        target_base_class: Any | None = None
+        for base in cls.__orig_bases__:
+            if base.__origin__ in generic_bases:
+                target_base_class = base
+                break
+
+        if target_base_class is None:
+            raise RuntimeError(
+                "Could not find the generic base class;\n"
+                "This should never happen;\n"
+                f"Does {cls} inherit from one of {generic_bases} ?"
+            )
+
+        extracted = extract_type_arg(target_base_class, index)
+        if is_typevar(extracted):
+            # If the extracted type argument is itself a type variable
+            # then that means the subclass itself is generic, so we have
+            # to resolve the type argument from the class itself, not
+            # the base class.
+            #
+            # Note: if there is more than 1 type argument, the subclass could
+            # change the ordering of the type arguments, this is not currently
+            # supported.
+            return extract_type_arg(typ, index)
+
+        return extracted
+
+    raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}")
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py
new file mode 100644
index 0000000000..ce5e7786aa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py
@@ -0,0 +1,409 @@
+from __future__ import annotations
+
+import functools
+import inspect
+import os
+import re
+from collections.abc import Callable, Iterable, Mapping, Sequence
+from pathlib import Path
+from typing import (
+    Any,
+    TypeGuard,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+)
+
+import sniffio
+
+from .._base_compat import parse_date as parse_date  # noqa: PLC0414
+from .._base_compat import parse_datetime as parse_datetime  # noqa: PLC0414
+from .._base_type import FileTypes, Headers, HeadersLike, NotGiven, NotGivenOr
+
+
+def remove_notgiven_indict(obj):
+    if obj is None or (not isinstance(obj, Mapping)):
+        return obj
+    return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
+
+
+_T = TypeVar("_T")
+_TupleT = TypeVar("_TupleT", bound=tuple[object, ...])
+_MappingT = TypeVar("_MappingT", bound=Mapping[str, object])
+_SequenceT = TypeVar("_SequenceT", bound=Sequence[object])
+CallableT = TypeVar("CallableT", bound=Callable[..., Any])
+
+
+def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
+    return [item for sublist in t for item in sublist]
+
+
+def extract_files(
+    # TODO: this needs to take Dict but variance issues.....
+    # create protocol type ?
+    query: Mapping[str, object],
+    *,
+    paths: Sequence[Sequence[str]],
+) -> list[tuple[str, FileTypes]]:
+    """Recursively extract files from the given dictionary based on specified paths.
+
+    A path may look like this ['foo', 'files', '<array>', 'data'].
+
+    Note: this mutates the given dictionary.
+    """
+    files: list[tuple[str, FileTypes]] = []
+    for path in paths:
+        files.extend(_extract_items(query, path, index=0, flattened_key=None))
+    return files
+
+
+def _extract_items(
+    obj: object,
+    path: Sequence[str],
+    *,
+    index: int,
+    flattened_key: str | None,
+) -> list[tuple[str, FileTypes]]:
+    try:
+        key = path[index]
+    except IndexError:
+        if isinstance(obj, NotGiven):
+            # no value was provided - we can safely ignore
+            return []
+
+        # cyclical import
+        from .._files import assert_is_file_content
+
+        # We have exhausted the path, return the entry we found.
+        assert_is_file_content(obj, key=flattened_key)
+        assert flattened_key is not None
+        return [(flattened_key, cast(FileTypes, obj))]
+
+    index += 1
+    if is_dict(obj):
+        try:
+            # We are at the last entry in the path so we must remove the field
+            if (len(path)) == index:
+                item = obj.pop(key)
+            else:
+                item = obj[key]
+        except KeyError:
+            # Key was not present in the dictionary, this is not indicative of an error
+            # as the given path may not point to a required field. We also do not want
+            # to enforce required fields as the API may differ from the spec in some cases.
+            return []
+        if flattened_key is None:
+            flattened_key = key
+        else:
+            flattened_key += f"[{key}]"
+        return _extract_items(
+            item,
+            path,
+            index=index,
+            flattened_key=flattened_key,
+        )
+    elif is_list(obj):
+        if key != "<array>":
+            return []
+
+        return flatten(
+            [
+                _extract_items(
+                    item,
+                    path,
+                    index=index,
+                    flattened_key=flattened_key + "[]" if flattened_key is not None else "[]",
+                )
+                for item in obj
+            ]
+        )
+
+    # Something unexpected was passed, just ignore it.
+    return []
+
+
+def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]:
+    return not isinstance(obj, NotGiven)
+
+
+# Type safe methods for narrowing types with TypeVars.
+# The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
+# however this cause Pyright to rightfully report errors. As we know we don't
+# care about the contained types we can safely use `object` in it's place.
+#
+# There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
+# `is_*` is for when you're dealing with an unknown input
+# `is_*_t` is for when you're narrowing a known union type to a specific subset
+
+
+def is_tuple(obj: object) -> TypeGuard[tuple[object, ...]]:
+    return isinstance(obj, tuple)
+
+
+def is_tuple_t(obj: _TupleT | object) -> TypeGuard[_TupleT]:
+    return isinstance(obj, tuple)
+
+
+def is_sequence(obj: object) -> TypeGuard[Sequence[object]]:
+    return isinstance(obj, Sequence)
+
+
+def is_sequence_t(obj: _SequenceT | object) -> TypeGuard[_SequenceT]:
+    return isinstance(obj, Sequence)
+
+
+def is_mapping(obj: object) -> TypeGuard[Mapping[str, object]]:
+    return isinstance(obj, Mapping)
+
+
+def is_mapping_t(obj: _MappingT | object) -> TypeGuard[_MappingT]:
+    return isinstance(obj, Mapping)
+
+
+def is_dict(obj: object) -> TypeGuard[dict[object, object]]:
+    return isinstance(obj, dict)
+
+
+def is_list(obj: object) -> TypeGuard[list[object]]:
+    return isinstance(obj, list)
+
+
+def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
+    return isinstance(obj, Iterable)
+
+
+def deepcopy_minimal(item: _T) -> _T:
+    """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
+
+    - mappings, e.g. `dict`
+    - list
+
+    This is done for performance reasons.
+    """
+    if is_mapping(item):
+        return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
+    if is_list(item):
+        return cast(_T, [deepcopy_minimal(entry) for entry in item])
+    return item
+
+
+# copied from https://github.com/Rapptz/RoboDanny
+def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
+    size = len(seq)
+    if size == 0:
+        return ""
+
+    if size == 1:
+        return seq[0]
+
+    if size == 2:
+        return f"{seq[0]} {final} {seq[1]}"
+
+    return delim.join(seq[:-1]) + f" {final} {seq[-1]}"
+
+
+def quote(string: str) -> str:
+    """Add single quotation marks around the given string. Does *not* do any escaping."""
+    return f"'{string}'"
+
+
+def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
+    """Decorator to enforce a given set of arguments or variants of arguments are passed to the decorated function.
+
+    Useful for enforcing runtime validation of overloaded functions.
+
+    Example usage:
+    ```py
+    @overload
+    def foo(*, a: str) -> str:
+        ...
+
+
+    @overload
+    def foo(*, b: bool) -> str:
+        ...
+
+
+    # This enforces the same constraints that a static type checker would
+    # i.e. that either a or b must be passed to the function
+    @required_args(["a"], ["b"])
+    def foo(*, a: str | None = None, b: bool | None = None) -> str:
+        ...
+    ```
+    """
+
+    def inner(func: CallableT) -> CallableT:
+        params = inspect.signature(func).parameters
+        positional = [
+            name
+            for name, param in params.items()
+            if param.kind
+            in {
+                param.POSITIONAL_ONLY,
+                param.POSITIONAL_OR_KEYWORD,
+            }
+        ]
+
+        @functools.wraps(func)
+        def wrapper(*args: object, **kwargs: object) -> object:
+            given_params: set[str] = set()
+            for i, _ in enumerate(args):
+                try:
+                    given_params.add(positional[i])
+                except IndexError:
+                    raise TypeError(
+                        f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
+                    ) from None
+
+            given_params.update(kwargs.keys())
+
+            for variant in variants:
+                matches = all(param in given_params for param in variant)
+                if matches:
+                    break
+            else:  # no break
+                if len(variants) > 1:
+                    variations = human_join(
+                        ["(" + human_join([quote(arg) for arg in variant], final="and") + ")" for variant in variants]
+                    )
+                    msg = f"Missing required arguments; Expected either {variations} arguments to be given"
+                else:
+                    # TODO: this error message is not deterministic
+                    missing = list(set(variants[0]) - given_params)
+                    if len(missing) > 1:
+                        msg = f"Missing required arguments: {human_join([quote(arg) for arg in missing])}"
+                    else:
+                        msg = f"Missing required argument: {quote(missing[0])}"
+                raise TypeError(msg)
+            return func(*args, **kwargs)
+
+        return wrapper  # type: ignore
+
+    return inner
+
+
+_K = TypeVar("_K")
+_V = TypeVar("_V")
+
+
+@overload
+def strip_not_given(obj: None) -> None: ...
+
+
+@overload
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ...
+
+
+@overload
+def strip_not_given(obj: object) -> object: ...
+
+
+def strip_not_given(obj: object | None) -> object:
+    """Remove all top-level keys where their values are instances of `NotGiven`"""
+    if obj is None:
+        return None
+
+    if not is_mapping(obj):
+        return obj
+
+    return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
+
+
+def coerce_integer(val: str) -> int:
+    return int(val, base=10)
+
+
+def coerce_float(val: str) -> float:
+    return float(val)
+
+
+def coerce_boolean(val: str) -> bool:
+    return val in {"true", "1", "on"}
+
+
+def maybe_coerce_integer(val: str | None) -> int | None:
+    if val is None:
+        return None
+    return coerce_integer(val)
+
+
+def maybe_coerce_float(val: str | None) -> float | None:
+    if val is None:
+        return None
+    return coerce_float(val)
+
+
+def maybe_coerce_boolean(val: str | None) -> bool | None:
+    if val is None:
+        return None
+    return coerce_boolean(val)
+
+
+def removeprefix(string: str, prefix: str) -> str:
+    """Remove a prefix from a string.
+
+    Backport of `str.removeprefix` for Python < 3.9
+    """
+    if string.startswith(prefix):
+        return string[len(prefix) :]
+    return string
+
+
+def removesuffix(string: str, suffix: str) -> str:
+    """Remove a suffix from a string.
+
+    Backport of `str.removesuffix` for Python < 3.9
+    """
+    if string.endswith(suffix):
+        return string[: -len(suffix)]
+    return string
+
+
+def file_from_path(path: str) -> FileTypes:
+    contents = Path(path).read_bytes()
+    file_name = os.path.basename(path)
+    return (file_name, contents)
+
+
+def get_required_header(headers: HeadersLike, header: str) -> str:
+    lower_header = header.lower()
+    if isinstance(headers, Mapping):
+        headers = cast(Headers, headers)
+        for k, v in headers.items():
+            if k.lower() == lower_header and isinstance(v, str):
+                return v
+
+    """ to deal with the case where the header looks like Stainless-Event-Id """
+    intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
+
+    for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
+        value = headers.get(normalized_header)
+        if value:
+            return value
+
+    raise ValueError(f"Could not find {header} header")
+
+
+def get_async_library() -> str:
+    try:
+        return sniffio.current_async_library()
+    except Exception:
+        return "false"
+
+
+def drop_prefix_image_data(content: Union[str, list[dict]]) -> Union[str, list[dict]]:
+    """
+    删除 ;base64, 前缀
+    :param image_data:
+    :return:
+    """
+    if isinstance(content, list):
+        for data in content:
+            if data.get("type") == "image_url":
+                image_data = data.get("image_url").get("url")
+                if image_data.startswith("data:image/"):
+                    image_data = image_data.split("base64,")[-1]
+                    data["image_url"]["url"] = image_data
+
+    return content
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py
new file mode 100644
index 0000000000..e5fce94c00
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py
@@ -0,0 +1,78 @@
+import logging
+import os
+import time
+
+logger = logging.getLogger(__name__)
+
+
+class LoggerNameFilter(logging.Filter):
+    def filter(self, record):
+        # return record.name.startswith("loom_core") or record.name in "ERROR" or (
+        #         record.name.startswith("uvicorn.error")
+        #         and record.getMessage().startswith("Uvicorn running on")
+        # )
+        return True
+
+
+def get_log_file(log_path: str, sub_dir: str):
+    """
+    sub_dir should contain a timestamp.
+    """
+    log_dir = os.path.join(log_path, sub_dir)
+    # Here should be creating a new directory each time, so `exist_ok=False`
+    os.makedirs(log_dir, exist_ok=False)
+    return os.path.join(log_dir, "zhipuai.log")
+
+
+def get_config_dict(log_level: str, log_file_path: str, log_backup_count: int, log_max_bytes: int) -> dict:
+    # for windows, the path should be a raw string.
+    log_file_path = log_file_path.encode("unicode-escape").decode() if os.name == "nt" else log_file_path
+    log_level = log_level.upper()
+    config_dict = {
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {
+            "formatter": {"format": ("%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s")},
+        },
+        "filters": {
+            "logger_name_filter": {
+                "()": __name__ + ".LoggerNameFilter",
+            },
+        },
+        "handlers": {
+            "stream_handler": {
+                "class": "logging.StreamHandler",
+                "formatter": "formatter",
+                "level": log_level,
+                # "stream": "ext://sys.stdout",
+                # "filters": ["logger_name_filter"],
+            },
+            "file_handler": {
+                "class": "logging.handlers.RotatingFileHandler",
+                "formatter": "formatter",
+                "level": log_level,
+                "filename": log_file_path,
+                "mode": "a",
+                "maxBytes": log_max_bytes,
+                "backupCount": log_backup_count,
+                "encoding": "utf8",
+            },
+        },
+        "loggers": {
+            "loom_core": {
+                "handlers": ["stream_handler", "file_handler"],
+                "level": log_level,
+                "propagate": False,
+            }
+        },
+        "root": {
+            "level": log_level,
+            "handlers": ["stream_handler", "file_handler"],
+        },
+    }
+    return config_dict
+
+
+def get_timestamp_ms():
+    t = time.time()
+    return int(round(t * 1000))
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py
new file mode 100644
index 0000000000..7f0b1b91d9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Any, Generic, Optional, TypeVar, cast
+
+from typing_extensions import Protocol, override, runtime_checkable
+
+from ._http_client import BasePage, BaseSyncPage, PageInfo
+
+__all__ = ["SyncPage", "SyncCursorPage"]
+
+_T = TypeVar("_T")
+
+
+@runtime_checkable
+class CursorPageItem(Protocol):
+    id: Optional[str]
+
+
+class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
+
+    data: list[_T]
+    object: str
+
+    @override
+    def _get_page_items(self) -> list[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def next_page_info(self) -> None:
+        """
+        This page represents a response that isn't actually paginated at the API level
+        so there will never be a next page.
+        """
+        return None
+
+
+class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    data: list[_T]
+
+    @override
+    def _get_page_items(self) -> list[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        data = self.data
+        if not data:
+            return None
+
+        item = cast(Any, data[-1])
+        if not isinstance(item, CursorPageItem) or item.id is None:
+            # TODO emit warning log
+            return None
+
+        return PageInfo(params={"after": item.id})
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py
new file mode 100644
index 0000000000..9f941fb91c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py
@@ -0,0 +1,5 @@
+from .assistant_completion import AssistantCompletion
+
+__all__ = [
+    "AssistantCompletion",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py
new file mode 100644
index 0000000000..cbfb6edaeb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py
@@ -0,0 +1,40 @@
+from typing import Any, Optional
+
+from ...core import BaseModel
+from .message import MessageContent
+
+__all__ = ["AssistantCompletion", "CompletionUsage"]
+
+
+class ErrorInfo(BaseModel):
+    code: str  # 错误码
+    message: str  # 错误信息
+
+
+class AssistantChoice(BaseModel):
+    index: int  # 结果下标
+    delta: MessageContent  # 当前会话输出消息体
+    finish_reason: str
+    """
+    # 推理结束原因 stop代表推理自然结束或触发停止词。  sensitive 代表模型推理内容被安全审核接口拦截。请注意，针对此类内容，请用户自行判断并决定是否撤回已公开的内容。 
+    # network_error 代表模型推理服务异常。
+    """  # noqa: E501
+    metadata: dict  # 元信息，拓展字段
+
+
+class CompletionUsage(BaseModel):
+    prompt_tokens: int  # 输入的 tokens 数量
+    completion_tokens: int  # 输出的 tokens 数量
+    total_tokens: int  # 总 tokens 数量
+
+
+class AssistantCompletion(BaseModel):
+    id: str  # 请求 ID
+    conversation_id: str  # 会话 ID
+    assistant_id: str  # 智能体 ID
+    created: int  # 请求创建时间，Unix 时间戳
+    status: str  # 返回状态，包括：`completed` 表示生成结束`in_progress`表示生成中 `failed` 表示生成异常
+    last_error: Optional[ErrorInfo]  # 异常信息
+    choices: list[AssistantChoice]  # 增量返回的信息
+    metadata: Optional[dict[str, Any]]  # 元信息，拓展字段
+    usage: Optional[CompletionUsage]  # tokens 数量统计
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py
new file mode 100644
index 0000000000..03f14f4238
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py
@@ -0,0 +1,7 @@
+from typing import TypedDict
+
+
+class ConversationParameters(TypedDict, total=False):
+    assistant_id: str  # 智能体 ID
+    page: int  # 当前分页
+    page_size: int  # 分页数量
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py
new file mode 100644
index 0000000000..d1833d220a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py
@@ -0,0 +1,29 @@
+from ...core import BaseModel
+
+__all__ = ["ConversationUsageListResp"]
+
+
+class Usage(BaseModel):
+    prompt_tokens: int  # 用户输入的 tokens 数量
+    completion_tokens: int  # 模型输入的 tokens 数量
+    total_tokens: int  # 总 tokens 数量
+
+
+class ConversationUsage(BaseModel):
+    id: str  # 会话 id
+    assistant_id: str  # 智能体Assistant id
+    create_time: int  # 创建时间
+    update_time: int  # 更新时间
+    usage: Usage  # 会话中 tokens 数量统计
+
+
+class ConversationUsageList(BaseModel):
+    assistant_id: str  # 智能体id
+    has_more: bool  # 是否还有更多页
+    conversation_list: list[ConversationUsage]  # 返回的
+
+
+class ConversationUsageListResp(BaseModel):
+    code: int
+    msg: str
+    data: ConversationUsageList
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py
new file mode 100644
index 0000000000..2def1025cd
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py
@@ -0,0 +1,32 @@
+from typing import Optional, TypedDict, Union
+
+
+class AssistantAttachments:
+    file_id: str
+
+
+class MessageTextContent:
+    type: str  # 目前支持 type = text
+    text: str
+
+
+MessageContent = Union[MessageTextContent]
+
+
+class ConversationMessage(TypedDict):
+    """会话消息体"""
+
+    role: str  # 用户的输入角色，例如 'user'
+    content: list[MessageContent]  # 会话消息体的内容
+
+
+class AssistantParameters(TypedDict, total=False):
+    """智能体参数类"""
+
+    assistant_id: str  # 智能体 ID
+    conversation_id: Optional[str]  # 会话 ID，不传则创建新会话
+    model: str  # 模型名称，默认为 'GLM-4-Assistant'
+    stream: bool  # 是否支持流式 SSE，需要传入 True
+    messages: list[ConversationMessage]  # 会话消息体
+    attachments: Optional[list[AssistantAttachments]]  # 会话指定的文件，非必填
+    metadata: Optional[dict]  # 元信息，拓展字段，非必填
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py
new file mode 100644
index 0000000000..0709cdbcad
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py
@@ -0,0 +1,21 @@
+from ...core import BaseModel
+
+__all__ = ["AssistantSupportResp"]
+
+
+class AssistantSupport(BaseModel):
+    assistant_id: str  # 智能体的 Assistant id，用于智能体会话
+    created_at: int  # 创建时间
+    updated_at: int  # 更新时间
+    name: str  # 智能体名称
+    avatar: str  # 智能体头像
+    description: str  # 智能体描述
+    status: str  # 智能体状态，目前只有 publish
+    tools: list[str]  # 智能体支持的工具名
+    starter_prompts: list[str]  # 智能体启动推荐的 prompt
+
+
+class AssistantSupportResp(BaseModel):
+    code: int
+    msg: str
+    data: list[AssistantSupport]  # 智能体列表
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py
new file mode 100644
index 0000000000..562e0151e5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py
@@ -0,0 +1,3 @@
+from .message_content import MessageContent
+
+__all__ = ["MessageContent"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py
new file mode 100644
index 0000000000..6a1a438a6f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py
@@ -0,0 +1,13 @@
+from typing import Annotated, TypeAlias, Union
+
+from ....core._utils import PropertyInfo
+from .text_content_block import TextContentBlock
+from .tools_delta_block import ToolsDeltaBlock
+
+__all__ = ["MessageContent"]
+
+
+MessageContent: TypeAlias = Annotated[
+    Union[ToolsDeltaBlock, TextContentBlock],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py
new file mode 100644
index 0000000000..865fb1139e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py
@@ -0,0 +1,14 @@
+from typing import Literal
+
+from ....core import BaseModel
+
+__all__ = ["TextContentBlock"]
+
+
+class TextContentBlock(BaseModel):
+    content: str
+
+    role: str = "assistant"
+
+    type: Literal["content"] = "content"
+    """Always `content`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py
new file mode 100644
index 0000000000..9d569b282e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py
@@ -0,0 +1,27 @@
+from typing import Literal
+
+__all__ = ["CodeInterpreterToolBlock"]
+
+from .....core import BaseModel
+
+
+class CodeInterpreterToolOutput(BaseModel):
+    """代码工具输出结果"""
+
+    type: str  # 代码执行日志，目前只有 logs
+    logs: str  # 代码执行的日志结果
+    error_msg: str  # 错误信息
+
+
+class CodeInterpreter(BaseModel):
+    """代码解释器"""
+
+    input: str  # 生成的代码片段，输入给代码沙盒
+    outputs: list[CodeInterpreterToolOutput]  # 代码执行后的输出结果
+
+
+class CodeInterpreterToolBlock(BaseModel):
+    """代码工具块"""
+
+    code_interpreter: CodeInterpreter  # 代码解释器对象
+    type: Literal["code_interpreter"]  # 调用工具的类型，始终为 `code_interpreter`
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py
new file mode 100644
index 0000000000..0b6895556b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py
@@ -0,0 +1,21 @@
+from typing import Literal
+
+from .....core import BaseModel
+
+__all__ = ["DrawingToolBlock"]
+
+
+class DrawingToolOutput(BaseModel):
+    image: str
+
+
+class DrawingTool(BaseModel):
+    input: str
+    outputs: list[DrawingToolOutput]
+
+
+class DrawingToolBlock(BaseModel):
+    drawing_tool: DrawingTool
+
+    type: Literal["drawing_tool"]
+    """Always `drawing_tool`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py
new file mode 100644
index 0000000000..c439bc4b3f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py
@@ -0,0 +1,22 @@
+from typing import Literal, Union
+
+__all__ = ["FunctionToolBlock"]
+
+from .....core import BaseModel
+
+
+class FunctionToolOutput(BaseModel):
+    content: str
+
+
+class FunctionTool(BaseModel):
+    name: str
+    arguments: Union[str, dict]
+    outputs: list[FunctionToolOutput]
+
+
+class FunctionToolBlock(BaseModel):
+    function: FunctionTool
+
+    type: Literal["function"]
+    """Always `drawing_tool`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py
new file mode 100644
index 0000000000..4789e9378a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py
@@ -0,0 +1,41 @@
+from typing import Literal
+
+from .....core import BaseModel
+
+
+class RetrievalToolOutput(BaseModel):
+    """
+    This class represents the output of a retrieval tool.
+
+    Attributes:
+    - text (str): The text snippet retrieved from the knowledge base.
+    - document (str): The name of the document from which the text snippet was retrieved, returned only in intelligent configuration.
+    """  # noqa: E501
+
+    text: str
+    document: str
+
+
+class RetrievalTool(BaseModel):
+    """
+    This class represents the outputs of a retrieval tool.
+
+    Attributes:
+    - outputs (List[RetrievalToolOutput]): A list of text snippets and their respective document names retrieved from the knowledge base.
+    """  # noqa: E501
+
+    outputs: list[RetrievalToolOutput]
+
+
+class RetrievalToolBlock(BaseModel):
+    """
+    This class represents a block for invoking the retrieval tool.
+
+    Attributes:
+    - retrieval (RetrievalTool): An instance of the RetrievalTool class containing the retrieval outputs.
+    - type (Literal["retrieval"]): The type of tool being used, always set to "retrieval".
+    """
+
+    retrieval: RetrievalTool
+    type: Literal["retrieval"]
+    """Always `retrieval`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py
new file mode 100644
index 0000000000..98544053d4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py
@@ -0,0 +1,16 @@
+from typing import Annotated, TypeAlias, Union
+
+from .....core._utils import PropertyInfo
+from .code_interpreter_delta_block import CodeInterpreterToolBlock
+from .drawing_tool_delta_block import DrawingToolBlock
+from .function_delta_block import FunctionToolBlock
+from .retrieval_delta_black import RetrievalToolBlock
+from .web_browser_delta_block import WebBrowserToolBlock
+
+__all__ = ["ToolsType"]
+
+
+ToolsType: TypeAlias = Annotated[
+    Union[DrawingToolBlock, CodeInterpreterToolBlock, WebBrowserToolBlock, RetrievalToolBlock, FunctionToolBlock],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py
new file mode 100644
index 0000000000..966e6fe0c8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py
@@ -0,0 +1,48 @@
+from typing import Literal
+
+from .....core import BaseModel
+
+__all__ = ["WebBrowserToolBlock"]
+
+
+class WebBrowserOutput(BaseModel):
+    """
+    This class represents the output of a web browser search result.
+
+    Attributes:
+    - title (str): The title of the search result.
+    - link (str): The URL link to the search result's webpage.
+    - content (str): The textual content extracted from the search result.
+    - error_msg (str): Any error message encountered during the search or retrieval process.
+    """
+
+    title: str
+    link: str
+    content: str
+    error_msg: str
+
+
+class WebBrowser(BaseModel):
+    """
+    This class represents the input and outputs of a web browser search.
+
+    Attributes:
+    - input (str): The input query for the web browser search.
+    - outputs (List[WebBrowserOutput]): A list of search results returned by the web browser.
+    """
+
+    input: str
+    outputs: list[WebBrowserOutput]
+
+
+class WebBrowserToolBlock(BaseModel):
+    """
+    This class represents a block for invoking the web browser tool.
+
+    Attributes:
+    - web_browser (WebBrowser): An instance of the WebBrowser class containing the search input and outputs.
+    - type (Literal["web_browser"]): The type of tool being used, always set to "web_browser".
+    """
+
+    web_browser: WebBrowser
+    type: Literal["web_browser"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py
new file mode 100644
index 0000000000..781a1ab819
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py
@@ -0,0 +1,16 @@
+from typing import Literal
+
+from ....core import BaseModel
+from .tools.tools_type import ToolsType
+
+__all__ = ["ToolsDeltaBlock"]
+
+
+class ToolsDeltaBlock(BaseModel):
+    tool_calls: list[ToolsType]
+    """The index of the content part in the message."""
+
+    role: str = "tool"
+
+    type: Literal["tool_calls"] = "tool_calls"
+    """Always `tool_calls`."""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py
new file mode 100644
index 0000000000..560562915c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py
@@ -0,0 +1,82 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Literal, Optional
+
+from ..core import BaseModel
+from .batch_error import BatchError
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+    data: Optional[list[BatchError]] = None
+
+    object: Optional[str] = None
+    """这个类型，一直是`list`。"""
+
+
+class Batch(BaseModel):
+    id: str
+
+    completion_window: str
+    """用于执行请求的地址信息。"""
+
+    created_at: int
+    """这是 Unix timestamp (in seconds) 表示的创建时间。"""
+
+    endpoint: str
+    """这是ZhipuAI endpoint的地址。"""
+
+    input_file_id: str
+    """标记为batch的输入文件的ID。"""
+
+    object: Literal["batch"]
+    """这个类型，一直是`batch`."""
+
+    status: Literal[
+        "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled"
+    ]
+    """batch 的状态。"""
+
+    cancelled_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示的取消时间。"""
+
+    cancelling_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示发起取消的请求时间 """
+
+    completed_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示的完成时间。"""
+
+    error_file_id: Optional[str] = None
+    """这个文件id包含了执行请求失败的请求的输出。"""
+
+    errors: Optional[Errors] = None
+
+    expired_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示的将在过期时间。"""
+
+    expires_at: Optional[int] = None
+    """Unix timestamp (in seconds) 触发过期"""
+
+    failed_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示的失败时间。"""
+
+    finalizing_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示的最终时间。"""
+
+    in_progress_at: Optional[int] = None
+    """Unix timestamp (in seconds) 表示的开始处理时间。"""
+
+    metadata: Optional[builtins.object] = None
+    """ 
+    key:value形式的元数据，以便将信息存储
+        结构化格式。键的长度是64个字符，值最长512个字符
+    """
+
+    output_file_id: Optional[str] = None
+    """完成请求的输出文件的ID。"""
+
+    request_counts: Optional[BatchRequestCounts] = None
+    """批次中不同状态的请求计数"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py
new file mode 100644
index 0000000000..3dae65ea46
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    completion_window: Required[str]
+    """The time frame within which the batch should be processed.
+
+    Currently only `24h` is supported.
+    """
+
+    endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings"]]
+    """The endpoint to be used for all requests in the batch.
+
+    Currently `/v1/chat/completions` and `/v1/embeddings` are supported.
+    """
+
+    input_file_id: Required[str]
+    """The ID of an uploaded file that contains requests for the new batch.
+
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+    for how to upload a file.
+
+    Your input file must be formatted as a
+    [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+    and must be uploaded with the purpose `batch`.
+    """
+
+    metadata: Optional[dict[str, str]]
+    """Optional custom metadata for the batch."""
+
+    auto_delete_input_file: Optional[bool]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py
new file mode 100644
index 0000000000..f934db1978
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..core import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+    code: Optional[str] = None
+    """定义的业务错误码"""
+
+    line: Optional[int] = None
+    """文件中的行号"""
+
+    message: Optional[str] = None
+    """关于对话文件中的错误的描述"""
+
+    param: Optional[str] = None
+    """参数名称，如果有的话"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py
new file mode 100644
index 0000000000..1a68167132
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after: str
+    """分页的游标，用于获取下一页的数据。
+
+    `after` 是一个指向当前页面的游标，用于获取下一页的数据。如果没有提供 `after`，则返回第一页的数据。
+    list.
+    """
+
+    limit: int
+    """这个参数用于限制返回的结果数量。
+
+    Limit 用于限制返回的结果数量。默认值为 10
+    """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py
new file mode 100644
index 0000000000..ca3ccae625
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py
@@ -0,0 +1,14 @@
+from ..core import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+    completed: int
+    """这个数字表示已经完成的请求。"""
+
+    failed: int
+    """这个数字表示失败的请求。"""
+
+    total: int
+    """这个数字表示总的请求。"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py
index a0645b0916..c1eed070f3 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py
@@ -1,10 +1,9 @@
 from typing import Optional
 
-from pydantic import BaseModel
-
+from ...core import BaseModel
 from .chat_completion import CompletionChoice, CompletionUsage
 
-__all__ = ["AsyncTaskStatus"]
+__all__ = ["AsyncTaskStatus", "AsyncCompletion"]
 
 
 class AsyncTaskStatus(BaseModel):
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py
index 4b3a929a2b..1945a826cd 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py
@@ -1,6 +1,6 @@
 from typing import Optional
 
-from pydantic import BaseModel
+from ...core import BaseModel
 
 __all__ = ["Completion", "CompletionUsage"]
 
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py
index c250699741..27fad0008a 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py
@@ -1,8 +1,9 @@
-from typing import Optional
+from typing import Any, Optional
 
-from pydantic import BaseModel
+from ...core import BaseModel
 
 __all__ = [
+    "CompletionUsage",
     "ChatCompletionChunk",
     "Choice",
     "ChoiceDelta",
@@ -53,3 +54,4 @@ class ChatCompletionChunk(BaseModel):
     created: Optional[int] = None
     model: Optional[str] = None
     usage: Optional[CompletionUsage] = None
+    extra_json: dict[str, Any]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py
new file mode 100644
index 0000000000..666b38855c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py
@@ -0,0 +1,146 @@
+from typing import Literal, Optional
+
+from typing_extensions import Required, TypedDict
+
+__all__ = [
+    "CodeGeexTarget",
+    "CodeGeexContext",
+    "CodeGeexExtra",
+]
+
+
+class CodeGeexTarget(TypedDict, total=False):
+    """补全的内容参数"""
+
+    path: Optional[str]
+    """文件路径"""
+    language: Required[
+        Literal[
+            "c",
+            "c++",
+            "cpp",
+            "c#",
+            "csharp",
+            "c-sharp",
+            "css",
+            "cuda",
+            "dart",
+            "lua",
+            "objectivec",
+            "objective-c",
+            "objective-c++",
+            "python",
+            "perl",
+            "prolog",
+            "swift",
+            "lisp",
+            "java",
+            "scala",
+            "tex",
+            "jsx",
+            "tsx",
+            "vue",
+            "markdown",
+            "html",
+            "php",
+            "js",
+            "javascript",
+            "typescript",
+            "go",
+            "shell",
+            "rust",
+            "sql",
+            "kotlin",
+            "vb",
+            "ruby",
+            "pascal",
+            "r",
+            "fortran",
+            "lean",
+            "matlab",
+            "delphi",
+            "scheme",
+            "basic",
+            "assembly",
+            "groovy",
+            "abap",
+            "gdscript",
+            "haskell",
+            "julia",
+            "elixir",
+            "excel",
+            "clojure",
+            "actionscript",
+            "solidity",
+            "powershell",
+            "erlang",
+            "cobol",
+            "alloy",
+            "awk",
+            "thrift",
+            "sparql",
+            "augeas",
+            "cmake",
+            "f-sharp",
+            "stan",
+            "isabelle",
+            "dockerfile",
+            "rmarkdown",
+            "literate-agda",
+            "tcl",
+            "glsl",
+            "antlr",
+            "verilog",
+            "racket",
+            "standard-ml",
+            "elm",
+            "yaml",
+            "smalltalk",
+            "ocaml",
+            "idris",
+            "visual-basic",
+            "protocol-buffer",
+            "bluespec",
+            "applescript",
+            "makefile",
+            "tcsh",
+            "maple",
+            "systemverilog",
+            "literate-coffeescript",
+            "vhdl",
+            "restructuredtext",
+            "sas",
+            "literate-haskell",
+            "java-server-pages",
+            "coffeescript",
+            "emacs-lisp",
+            "mathematica",
+            "xslt",
+            "zig",
+            "common-lisp",
+            "stata",
+            "agda",
+            "ada",
+        ]
+    ]
+    """代码语言类型，如python"""
+    code_prefix: Required[str]
+    """补全位置的前文"""
+    code_suffix: Required[str]
+    """补全位置的后文"""
+
+
+class CodeGeexContext(TypedDict, total=False):
+    """附加代码"""
+
+    path: Required[str]
+    """附加代码文件的路径"""
+    code: Required[str]
+    """附加的代码内容"""
+
+
+class CodeGeexExtra(TypedDict, total=False):
+    target: Required[CodeGeexTarget]
+    """补全的内容参数"""
+    contexts: Optional[list[CodeGeexContext]]
+    """附加代码"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py
index e01f2c815f..8425b5c866 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py
@@ -2,8 +2,7 @@ from __future__ import annotations
 
 from typing import Optional
 
-from pydantic import BaseModel
-
+from ..core import BaseModel
 from .chat.chat_completion import CompletionUsage
 
 __all__ = ["Embedding", "EmbeddingsResponded"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py
new file mode 100644
index 0000000000..bbaf59e4d7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py
@@ -0,0 +1,5 @@
+from .file_deleted import FileDeleted
+from .file_object import FileObject, ListOfFileObject
+from .upload_detail import UploadDetail
+
+__all__ = ["FileObject", "ListOfFileObject", "UploadDetail", "FileDeleted"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py
new file mode 100644
index 0000000000..4ef93b1c05
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileCreateParams"]
+
+from ...core import FileTypes
+from . import UploadDetail
+
+
+class FileCreateParams(TypedDict, total=False):
+    file: FileTypes
+    """file和 upload_detail二选一必填"""
+
+    upload_detail: list[UploadDetail]
+    """file和 upload_detail二选一必填"""
+
+    purpose: Required[Literal["fine-tune", "retrieval", "batch"]]
+    """ 
+    上传文件的用途，支持 "fine-tune和 "retrieval"
+    retrieval支持上传Doc、Docx、PDF、Xlsx、URL类型文件，且单个文件的大小不超过 5MB。
+    fine-tune支持上传.jsonl文件且当前单个文件的大小最大可为 100 MB ，文件中语料格式需满足微调指南中所描述的格式。
+    """
+    custom_separator: Optional[list[str]]
+    """ 
+    当 purpose 为 retrieval 且文件类型为 pdf, url, docx 时上传，切片规则默认为 `\n`。
+    """
+    knowledge_id: str
+    """ 
+        当文件上传目的为  retrieval 时，需要指定知识库ID进行上传。
+    """
+
+    sentence_size: int
+    """ 
+        当文件上传目的为  retrieval 时，需要指定知识库ID进行上传。
+    """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py
new file mode 100644
index 0000000000..a384b1a69a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py
@@ -0,0 +1,13 @@
+from typing import Literal
+
+from ...core import BaseModel
+
+__all__ = ["FileDeleted"]
+
+
+class FileDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["file"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py
similarity index 86%
rename from api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py
rename to api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py
index 75f76fe969..8f9d0fbb8e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py
@@ -1,8 +1,8 @@
 from typing import Optional
 
-from pydantic import BaseModel
+from ...core import BaseModel
 
-__all__ = ["FileObject"]
+__all__ = ["FileObject", "ListOfFileObject"]
 
 
 class FileObject(BaseModel):
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py
new file mode 100644
index 0000000000..8f1ca5ce57
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py
@@ -0,0 +1,13 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+
+class UploadDetail(BaseModel):
+    url: str
+    knowledge_type: int
+    file_name: Optional[str] = None
+    sentence_size: Optional[int] = None
+    custom_separator: Optional[list[str]] = None
+    callback_url: Optional[str] = None
+    callback_header: Optional[dict[str, str]] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py
index 1d3930286b..75c7553dbe 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py
@@ -1,6 +1,6 @@
 from typing import Optional, Union
 
-from pydantic import BaseModel
+from ...core import BaseModel
 
 __all__ = ["FineTuningJob", "Error", "Hyperparameters", "ListOfFineTuningJob"]
 
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py
index e26b448534..f996cff114 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py
@@ -1,6 +1,6 @@
 from typing import Optional, Union
 
-from pydantic import BaseModel
+from ...core import BaseModel
 
 __all__ = ["FineTuningJobEvent", "Metric", "JobEvent"]
 
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py
new file mode 100644
index 0000000000..57d0d2511d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py
@@ -0,0 +1 @@
+from .fine_tuned_models import FineTunedModelsStatus
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py
new file mode 100644
index 0000000000..b286a5b577
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py
@@ -0,0 +1,13 @@
+from typing import ClassVar
+
+from ....core import PYDANTIC_V2, BaseModel, ConfigDict
+
+__all__ = ["FineTunedModelsStatus"]
+
+
+class FineTunedModelsStatus(BaseModel):
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow", protected_namespaces=())
+    request_id: str  # 请求id
+    model_name: str  # 模型名称
+    delete_status: str  # 删除状态 deleting（删除中）, deleted （已删除）
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py
index b352ce0954..3bcad0acab 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 from typing import Optional
 
-from pydantic import BaseModel
+from ..core import BaseModel
 
 __all__ = ["GeneratedImage", "ImagesResponded"]
 
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py
new file mode 100644
index 0000000000..8c81d703e2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py
@@ -0,0 +1,8 @@
+from .knowledge import KnowledgeInfo
+from .knowledge_used import KnowledgeStatistics, KnowledgeUsed
+
+__all__ = [
+    "KnowledgeInfo",
+    "KnowledgeStatistics",
+    "KnowledgeUsed",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
new file mode 100644
index 0000000000..32e23e6dab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
@@ -0,0 +1,8 @@
+from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessinfo
+
+__all__ = [
+    "DocumentData",
+    "DocumentObject",
+    "DocumentSuccessinfo",
+    "DocumentFailedInfo",
+]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
new file mode 100644
index 0000000000..b9a1646391
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
@@ -0,0 +1,51 @@
+from typing import Optional
+
+from ....core import BaseModel
+
+__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessinfo", "DocumentFailedInfo"]
+
+
+class DocumentSuccessinfo(BaseModel):
+    documentId: Optional[str] = None
+    """文件id"""
+    filename: Optional[str] = None
+    """文件名称"""
+
+
+class DocumentFailedInfo(BaseModel):
+    failReason: Optional[str] = None
+    """上传失败的原因，包括：文件格式不支持、文件大小超出限制、知识库容量已满、容量上限为 50 万字。"""
+    filename: Optional[str] = None
+    """文件名称"""
+    documentId: Optional[str] = None
+    """知识库id"""
+
+
+class DocumentObject(BaseModel):
+    """文档信息"""
+
+    successInfos: Optional[list[DocumentSuccessinfo]] = None
+    """上传成功的文件信息"""
+    failedInfos: Optional[list[DocumentFailedInfo]] = None
+    """上传失败的文件信息"""
+
+
+class DocumentDataFailInfo(BaseModel):
+    """失败原因"""
+
+    embedding_code: Optional[int] = (
+        None  # 失败码 10001：知识不可用，知识库空间已达上限 10002：知识不可用，知识库空间已达上限(字数超出限制)
+    )
+    embedding_msg: Optional[str] = None  # 失败原因
+
+
+class DocumentData(BaseModel):
+    id: str = None  # 知识唯一id
+    custom_separator: list[str] = None  # 切片规则
+    sentence_size: str = None  # 切片大小
+    length: int = None  # 文件大小（字节）
+    word_num: int = None  # 文件字数
+    name: str = None  # 文件名
+    url: str = None  # 文件下载链接
+    embedding_stat: int = None  # 0:向量化中 1:向量化完成 2:向量化失败
+    failInfo: Optional[DocumentDataFailInfo] = None  # 失败原因 向量化失败embedding_stat=2的时候 会有此值
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py
new file mode 100644
index 0000000000..509cb3a451
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py
@@ -0,0 +1,29 @@
+from typing import Optional, TypedDict
+
+__all__ = ["DocumentEditParams"]
+
+
+class DocumentEditParams(TypedDict):
+    """
+    知识参数类型定义
+
+    Attributes:
+        id (str): 知识ID
+        knowledge_type (int): 知识类型:
+                        1:文章知识: 支持pdf,url,docx
+                        2.问答知识-文档:  支持pdf,url,docx
+                        3.问答知识-表格:  支持xlsx
+                        4.商品库-表格:  支持xlsx
+                        5.自定义:  支持pdf,url,docx
+        custom_separator (Optional[List[str]]): 当前知识类型为自定义(knowledge_type=5)时的切片规则，默认\n
+        sentence_size (Optional[int]): 当前知识类型为自定义(knowledge_type=5)时的切片字数，取值范围: 20-2000，默认300
+        callback_url (Optional[str]): 回调地址
+        callback_header (Optional[dict]): 回调时携带的header
+    """
+
+    id: str
+    knowledge_type: int
+    custom_separator: Optional[list[str]]
+    sentence_size: Optional[int]
+    callback_url: Optional[str]
+    callback_header: Optional[dict[str, str]]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py
new file mode 100644
index 0000000000..910c8c045e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+
+class DocumentListParams(TypedDict, total=False):
+    """
+    文件查询参数类型定义
+
+    Attributes:
+        purpose (Optional[str]): 文件用途
+        knowledge_id (Optional[str]): 当文件用途为 retrieval 时，需要提供查询的知识库ID
+        page (Optional[int]): 页，默认1
+        limit (Optional[int]): 查询文件列表数，默认10
+        after (Optional[str]): 查询指定fileID之后的文件列表（当文件用途为 fine-tune 时需要）
+        order (Optional[str]): 排序规则，可选值['desc', 'asc']，默认desc（当文件用途为 fine-tune 时需要）
+    """
+
+    purpose: Optional[str]
+    knowledge_id: Optional[str]
+    page: Optional[int]
+    limit: Optional[int]
+    after: Optional[str]
+    order: Optional[str]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py
new file mode 100644
index 0000000000..acae4fad9f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from ....core import BaseModel
+from . import DocumentData
+
+__all__ = ["DocumentPage"]
+
+
+class DocumentPage(BaseModel):
+    list: list[DocumentData]
+    object: str
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py
new file mode 100644
index 0000000000..bc6f159eb2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py
@@ -0,0 +1,21 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = ["KnowledgeInfo"]
+
+
+class KnowledgeInfo(BaseModel):
+    id: Optional[str] = None
+    """知识库唯一 id"""
+    embedding_id: Optional[str] = (
+        None  # 知识库绑定的向量化模型 见模型列表 [内部服务开放接口文档](https://lslfd0slxc.feishu.cn/docx/YauWdbBiMopV0FxB7KncPWCEn8f#H15NduiQZo3ugmxnWQFcfAHpnQ4)
+    )
+    name: Optional[str] = None  # 知识库名称 100字限制
+    customer_identifier: Optional[str] = None  # 用户标识 长度32位以内
+    description: Optional[str] = None  # 知识库描述 500字限制
+    background: Optional[str] = None  # 背景颜色（给枚举）'blue', 'red', 'orange', 'purple', 'sky'
+    icon: Optional[str] = (
+        None  # 知识库图标（给枚举） question: 问号、book: 书籍、seal: 印章、wrench: 扳手、tag: 标签、horn: 喇叭、house: 房子  # noqa: E501
+    )
+    bucket_id: Optional[str] = None  # 桶id 限制32位
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py
new file mode 100644
index 0000000000..c3da201727
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from typing_extensions import TypedDict
+
+__all__ = ["KnowledgeBaseParams"]
+
+
+class KnowledgeBaseParams(TypedDict):
+    """
+    知识库参数类型定义
+
+    Attributes:
+        embedding_id (int): 知识库绑定的向量化模型ID
+        name (str): 知识库名称，限制100字
+        customer_identifier (Optional[str]): 用户标识，长度32位以内
+        description (Optional[str]): 知识库描述，限制500字
+        background (Optional[Literal['blue', 'red', 'orange', 'purple', 'sky']]): 背景颜色
+        icon (Optional[Literal['question', 'book', 'seal', 'wrench', 'tag', 'horn', 'house']]): 知识库图标
+        bucket_id (Optional[str]): 桶ID，限制32位
+    """
+
+    embedding_id: int
+    name: str
+    customer_identifier: Optional[str]
+    description: Optional[str]
+    background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None
+    icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None
+    bucket_id: Optional[str]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py
new file mode 100644
index 0000000000..a221b28e46
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["KnowledgeListParams"]
+
+
+class KnowledgeListParams(TypedDict, total=False):
+    page: int = 1
+    """ 页码，默认 1，第一页
+    """
+
+    size: int = 10
+    """每页数量 默认10
+    """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py
new file mode 100644
index 0000000000..e462eddc55
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from ...core import BaseModel
+from . import KnowledgeInfo
+
+__all__ = ["KnowledgePage"]
+
+
+class KnowledgePage(BaseModel):
+    list: list[KnowledgeInfo]
+    object: str
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py
new file mode 100644
index 0000000000..cfda709702
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py
@@ -0,0 +1,21 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = ["KnowledgeStatistics", "KnowledgeUsed"]
+
+
+class KnowledgeStatistics(BaseModel):
+    """
+    使用量统计
+    """
+
+    word_num: Optional[int] = None
+    length: Optional[int] = None
+
+
+class KnowledgeUsed(BaseModel):
+    used: Optional[KnowledgeStatistics] = None
+    """已使用量"""
+    total: Optional[KnowledgeStatistics] = None
+    """知识库总量"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py
new file mode 100644
index 0000000000..c9bd60419c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py
@@ -0,0 +1,3 @@
+from .sensitive_word_check import SensitiveWordCheckRequest
+
+__all__ = ["SensitiveWordCheckRequest"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py
new file mode 100644
index 0000000000..0c37d99e65
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py
@@ -0,0 +1,14 @@
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+
+class SensitiveWordCheckRequest(TypedDict, total=False):
+    type: Optional[str]
+    """敏感词类型，当前仅支持ALL"""
+    status: Optional[str]
+    """敏感词启用禁用状态
+        启用：ENABLE
+        禁用：DISABLE
+       备注：默认开启敏感词校验，如果要关闭敏感词校验，需联系商务获取对应权限，否则敏感词禁用不生效。
+    """
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py
new file mode 100644
index 0000000000..62f77344ee
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py
@@ -0,0 +1,9 @@
+from .web_search import (
+    SearchIntent,
+    SearchRecommend,
+    SearchResult,
+    WebSearch,
+)
+from .web_search_chunk import WebSearchChunk
+
+__all__ = ["WebSearch", "SearchIntent", "SearchResult", "SearchRecommend", "WebSearchChunk"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py
new file mode 100644
index 0000000000..b3a3b26f07
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from typing import Optional, Union
+
+from typing_extensions import TypedDict
+
+__all__ = ["WebSearchParams"]
+
+
+class WebSearchParams(TypedDict):
+    """
+    工具名：web-search-pro参数类型定义
+
+    Attributes:
+        :param model: str, 模型名称
+        :param request_id: Optional[str], 请求ID
+        :param stream: Optional[bool], 是否流式
+        :param messages: Union[str, List[str], List[int], object, None],
+                        包含历史对话上下文的内容，按照 {"role": "user", "content": "你好"} 的json 数组形式进行传参
+                        当前版本仅支持 User Message 单轮对话，工具会理解User Message并进行搜索，
+                        请尽可能传入不带指令格式的用户原始提问，以提高搜索准确率。
+        :param scope: Optional[str], 指定搜索范围，全网、学术等，默认全网
+        :param location: Optional[str], 指定搜索用户地区 location 提高相关性
+        :param recent_days: Optional[int],支持指定返回 N 天（1-30）更新的搜索结果
+
+
+    """
+
+    model: str
+    request_id: Optional[str]
+    stream: Optional[bool]
+    messages: Union[str, list[str], list[int], object, None]
+    scope: Optional[str] = None
+    location: Optional[str] = None
+    recent_days: Optional[int] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py
new file mode 100644
index 0000000000..ac9fa3821e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py
@@ -0,0 +1,71 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = [
+    "WebSearch",
+    "SearchIntent",
+    "SearchResult",
+    "SearchRecommend",
+]
+
+
+class SearchIntent(BaseModel):
+    index: int
+    # 搜索轮次，默认为 0
+    query: str
+    # 搜索优化 query
+    intent: str
+    # 判断的意图类型
+    keywords: str
+    # 搜索关键词
+
+
+class SearchResult(BaseModel):
+    index: int
+    # 搜索轮次，默认为 0
+    title: str
+    # 标题
+    link: str
+    # 链接
+    content: str
+    # 内容
+    icon: str
+    # 图标
+    media: str
+    # 来源媒体
+    refer: str
+    # 角标序号 [ref_1]
+
+
+class SearchRecommend(BaseModel):
+    index: int
+    # 搜索轮次，默认为 0
+    query: str
+    # 推荐query
+
+
+class WebSearchMessageToolCall(BaseModel):
+    id: str
+    search_intent: Optional[SearchIntent]
+    search_result: Optional[SearchResult]
+    search_recommend: Optional[SearchRecommend]
+    type: str
+
+
+class WebSearchMessage(BaseModel):
+    role: str
+    tool_calls: Optional[list[WebSearchMessageToolCall]] = None
+
+
+class WebSearchChoice(BaseModel):
+    index: int
+    finish_reason: str
+    message: WebSearchMessage
+
+
+class WebSearch(BaseModel):
+    created: Optional[int] = None
+    choices: list[WebSearchChoice]
+    request_id: Optional[str] = None
+    id: Optional[str] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py
new file mode 100644
index 0000000000..7fb0e02bb5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py
@@ -0,0 +1,33 @@
+from typing import Optional
+
+from ...core import BaseModel
+from .web_search import SearchIntent, SearchRecommend, SearchResult
+
+__all__ = ["WebSearchChunk"]
+
+
+class ChoiceDeltaToolCall(BaseModel):
+    index: int
+    id: Optional[str] = None
+
+    search_intent: Optional[SearchIntent] = None
+    search_result: Optional[SearchResult] = None
+    search_recommend: Optional[SearchRecommend] = None
+    type: Optional[str] = None
+
+
+class ChoiceDelta(BaseModel):
+    role: Optional[str] = None
+    tool_calls: Optional[list[ChoiceDeltaToolCall]] = None
+
+
+class Choice(BaseModel):
+    delta: ChoiceDelta
+    finish_reason: Optional[str] = None
+    index: int
+
+
+class WebSearchChunk(BaseModel):
+    id: Optional[str] = None
+    choices: list[Choice]
+    created: Optional[int] = None
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py
new file mode 100644
index 0000000000..b14072b1a7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py
@@ -0,0 +1,3 @@
+from .video_object import VideoObject, VideoResult
+
+__all__ = ["VideoObject", "VideoResult"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py
new file mode 100644
index 0000000000..f5489d708e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+__all__ = ["VideoCreateParams"]
+
+from ..sensitive_word_check import SensitiveWordCheckRequest
+
+
+class VideoCreateParams(TypedDict, total=False):
+    model: str
+    """模型编码"""
+    prompt: str
+    """所需视频的文本描述"""
+    image_url: str
+    """所需视频的文本描述"""
+    sensitive_word_check: Optional[SensitiveWordCheckRequest]
+    """支持 URL 或者 Base64、传入 image 奖进行图生视频
+     * 图片格式：
+     *   图片大小："""
+    request_id: str
+    """由用户端传参，需保证唯一性；用于区分每次请求的唯一标识，用户端不传时平台会默认生成。"""
+
+    user_id: str
+    """用户端。"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py
new file mode 100644
index 0000000000..85c3844d8a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py
@@ -0,0 +1,30 @@
+from typing import Optional
+
+from ...core import BaseModel
+
+__all__ = ["VideoObject", "VideoResult"]
+
+
+class VideoResult(BaseModel):
+    url: str
+    """视频url"""
+    cover_image_url: str
+    """预览图"""
+
+
+class VideoObject(BaseModel):
+    id: Optional[str] = None
+    """智谱 AI 开放平台生成的任务订单号，调用请求结果接口时请使用此订单号"""
+
+    model: str
+    """模型名称"""
+
+    video_result: list[VideoResult]
+    """视频生成结果"""
+
+    task_status: str
+    """处理状态，PROCESSING（处理中），SUCCESS（成功），FAIL（失败）
+    注：处理中状态需通过查询获取结果"""
+
+    request_id: str
+    """用户在客户端请求时提交的任务编号或者平台生成的任务编号"""
diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py
index f27a0af6e0..db6ce9d9c3 100644
--- a/api/core/ops/entities/trace_entity.py
+++ b/api/core/ops/entities/trace_entity.py
@@ -21,8 +21,7 @@ class BaseTraceInfo(BaseModel):
             return None
         if isinstance(v, str | dict | list):
             return v
-        else:
-            return ""
+        return ""
 
 
 class WorkflowTraceInfo(BaseTraceInfo):
diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py
index 6f17bade97..0200f4a32d 100644
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@@ -176,11 +176,18 @@ class OpsTraceManager:
             return None
 
         app: App = db.session.query(App).filter(App.id == app_id).first()
+
+        if app is None:
+            return None
+
         app_ops_trace_config = json.loads(app.tracing) if app.tracing else None
 
-        if app_ops_trace_config is not None:
-            tracing_provider = app_ops_trace_config.get("tracing_provider")
-        else:
+        if app_ops_trace_config is None:
+            return None
+
+        tracing_provider = app_ops_trace_config.get("tracing_provider")
+
+        if tracing_provider is None or tracing_provider not in provider_config_map:
             return None
 
         # decrypt_token
@@ -701,7 +708,7 @@ class TraceQueueManager:
                 trace_task.app_id = self.app_id
                 trace_manager_queue.put(trace_task)
         except Exception as e:
-            logging.debug(f"Error adding trace task: {e}")
+            logging.error(f"Error adding trace task: {e}")
         finally:
             self.start_timer()
 
@@ -720,7 +727,7 @@ class TraceQueueManager:
             if tasks:
                 self.send_to_celery(tasks)
         except Exception as e:
-            logging.debug(f"Error processing trace tasks: {e}")
+            logging.error(f"Error processing trace tasks: {e}")
 
     def start_timer(self):
         global trace_manager_timer
diff --git a/api/core/ops/utils.py b/api/core/ops/utils.py
index 498685b342..3cd3fb5756 100644
--- a/api/core/ops/utils.py
+++ b/api/core/ops/utils.py
@@ -6,12 +6,15 @@ from models.model import Message
 
 
 def filter_none_values(data: dict):
+    new_data = {}
     for key, value in data.items():
         if value is None:
             continue
         if isinstance(value, datetime):
-            data[key] = value.isoformat()
-    return {key: value for key, value in data.items() if value is not None}
+            new_data[key] = value.isoformat()
+        else:
+            new_data[key] = value
+    return new_data
 
 
 def get_message_data(message_id):
diff --git a/api/core/prompt/utils/extract_thread_messages.py b/api/core/prompt/utils/extract_thread_messages.py
new file mode 100644
index 0000000000..e8b626499f
--- /dev/null
+++ b/api/core/prompt/utils/extract_thread_messages.py
@@ -0,0 +1,22 @@
+from constants import UUID_NIL
+
+
+def extract_thread_messages(messages: list[dict]) -> list[dict]:
+    thread_messages = []
+    next_message = None
+
+    for message in messages:
+        if not message.parent_message_id:
+            # If the message is regenerated and does not have a parent message, it is the start of a new thread
+            thread_messages.append(message)
+            break
+
+        if not next_message:
+            thread_messages.append(message)
+            next_message = message.parent_message_id
+        else:
+            if next_message in {message.id, UUID_NIL}:
+                thread_messages.append(message)
+                next_message = message.parent_message_id
+
+    return thread_messages
diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py
index 286ecd4c03..4603957d68 100644
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@@ -110,7 +110,7 @@ class DatasetRetrieval:
                 continue
 
             # pass if dataset is not available
-            if dataset and dataset.available_document_count == 0 and dataset.available_document_count == 0:
+            if dataset and dataset.available_document_count == 0:
                 continue
 
             available_datasets.append(dataset)
@@ -468,7 +468,7 @@ class DatasetRetrieval:
                 continue
 
             # pass if dataset is not available
-            if dataset and dataset.available_document_count == 0 and dataset.available_document_count == 0:
+            if dataset and dataset.available_document_count == 0:
                 continue
 
             available_datasets.append(dataset)
diff --git a/api/core/tools/README.md b/api/core/tools/README.md
index c7ee81422e..b5d0a30d34 100644
--- a/api/core/tools/README.md
+++ b/api/core/tools/README.md
@@ -9,10 +9,10 @@ The tools provided for Agents and Workflows are currently divided into two categ
 - `Api-Based Tools`  leverage third-party APIs for implementation. You don't need to code to integrate these -- simply provide interface definitions in formats like `OpenAPI` , `Swagger`, or the `OpenAI-plugin` on the front-end.
 
 ### Built-in Tool Providers
-![Alt text](docs/zh_Hans/images/index/image.png)
+![Alt text](docs/images/index/image.png)
 
 ### API Tool Providers
-![Alt text](docs/zh_Hans/images/index/image-1.png)
+![Alt text](docs/images/index/image-1.png)
 
 ## Tool Integration
 
diff --git a/api/core/tools/README_CN.md b/api/core/tools/README_CN.md
index fda5d0630c..7e18441131 100644
--- a/api/core/tools/README_CN.md
+++ b/api/core/tools/README_CN.md
@@ -12,10 +12,10 @@
 - `Api-Based Tools` 基于API的工具，即通过调用第三方API实现的工具，`Api-Based Tool`不需要再额外定义，只需提供`OpenAPI` `Swagger` `OpenAI plugin`等接口文档即可。
 
 ### 内置工具供应商
-![Alt text](docs/zh_Hans/images/index/image.png)
+![Alt text](docs/images/index/image.png)
 
 ### API工具供应商
-![Alt text](docs/zh_Hans/images/index/image-1.png)
+![Alt text](docs/images/index/image-1.png)
 
 ## 工具接入
 为了实现更灵活更强大的功能，Tools提供了一系列的接口，帮助开发者快速构建想要的工具，本文作为开发者的入门指南，将会以[快速接入](./docs/zh_Hans/tool_scale_out.md)和[高级接入](./docs/zh_Hans/advanced_scale_out.md)两部分介绍如何接入工具。
diff --git a/api/core/tools/README_JP.md b/api/core/tools/README_JP.md
new file mode 100644
index 0000000000..39d0bf1762
--- /dev/null
+++ b/api/core/tools/README_JP.md
@@ -0,0 +1,31 @@
+# Tools
+
+このモジュールは、Difyのエージェントアシスタントやワークフローで使用される組み込みツールを実装しています。このモジュールでは、フロントエンドのロジックを変更することなく、独自のツールを定義し表示することができます。この分離により、Difyの機能を容易に水平方向にスケールアウトできます。
+
+## 機能紹介
+
+エージェントとワークフロー向けに提供されるツールは、現在2つのカテゴリーに分類されています。
+
+- `Built-in Tools`はDify内部で実装され、エージェントとワークフローで使用するためにハードコードされています。
+- `Api-Based Tools`はサードパーティのAPIを利用して実装されています。これらを統合するためのコーディングは不要で、フロントエンドで
+  `OpenAPI`, `Swagger`または`OpenAI-plugin`などの形式でインターフェース定義を提供するだけです。
+
+### 組み込みツールプロバイダー
+
+![Alt text](docs/images/index/image.png)
+
+### APIツールプロバイダー
+
+![Alt text](docs/images/index/image-1.png)
+
+## ツールの統合
+
+開発者が柔軟で強力なツールを構築できるよう、2つのガイドを提供しています。
+
+### [クイック統合 👈🏻](./docs/ja_JP/tool_scale_out.md)
+
+クイック統合は、Google検索ツールの例を通じて、ツール統合の基本をすばやく理解できるようにすることを目的としています。
+
+### [高度な統合 👈🏻](./docs/ja_JP/advanced_scale_out.md)
+
+高度な統合では、モジュールインターフェースについてより深く掘り下げ、画像生成、複数ツールの組み合わせ、異なるツール間でのパラメーター、画像、ファイルのフロー管理など、より複雑な機能の実装方法を説明します。
\ No newline at end of file
diff --git a/api/core/tools/docs/en_US/tool_scale_out.md b/api/core/tools/docs/en_US/tool_scale_out.md
index 121b7a5a76..1deaf04a47 100644
--- a/api/core/tools/docs/en_US/tool_scale_out.md
+++ b/api/core/tools/docs/en_US/tool_scale_out.md
@@ -245,4 +245,4 @@ After the above steps are completed, we can see this tool on the frontend, and i
 
 Of course, because google_search needs a credential, before using it, you also need to input your credentials on the frontend.
 
-![Alt text](../zh_Hans/images/index/image-2.png)
+![Alt text](../images/index/image-2.png)
diff --git a/api/core/tools/docs/zh_Hans/images/index/image-1.png b/api/core/tools/docs/images/index/image-1.png
similarity index 100%
rename from api/core/tools/docs/zh_Hans/images/index/image-1.png
rename to api/core/tools/docs/images/index/image-1.png
diff --git a/api/core/tools/docs/zh_Hans/images/index/image-2.png b/api/core/tools/docs/images/index/image-2.png
similarity index 100%
rename from api/core/tools/docs/zh_Hans/images/index/image-2.png
rename to api/core/tools/docs/images/index/image-2.png
diff --git a/api/core/tools/docs/zh_Hans/images/index/image.png b/api/core/tools/docs/images/index/image.png
similarity index 100%
rename from api/core/tools/docs/zh_Hans/images/index/image.png
rename to api/core/tools/docs/images/index/image.png
diff --git a/api/core/tools/docs/ja_JP/advanced_scale_out.md b/api/core/tools/docs/ja_JP/advanced_scale_out.md
new file mode 100644
index 0000000000..96f843354f
--- /dev/null
+++ b/api/core/tools/docs/ja_JP/advanced_scale_out.md
@@ -0,0 +1,283 @@
+# 高度なツール統合
+
+このガイドを始める前に、Difyのツール統合プロセスの基本を理解していることを確認してください。簡単な概要については[クイック統合](./tool_scale_out.md)をご覧ください。
+
+## ツールインターフェース
+
+より複雑なツールを迅速に構築するのを支援するため、`Tool`クラスに一連のヘルパーメソッドを定義しています。
+
+### メッセージの返却
+
+Difyは`テキスト`、`リンク`、`画像`、`ファイルBLOB`、`JSON`などの様々なメッセージタイプをサポートしています。以下のインターフェースを通じて、異なるタイプのメッセージをLLMとユーザーに返すことができます。
+
+注意：以下のインターフェースの一部のパラメータについては、後のセクションで説明します。
+
+#### 画像URL
+画像のURLを渡すだけで、Difyが自動的に画像をダウンロードしてユーザーに返します。
+
+```python
+    def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessage:
+    """
+        create an image message
+
+        :param image: the url of the image
+        :param save_as: save as
+        :return: the image message
+    """
+```
+
+#### リンク
+リンクを返す必要がある場合は、以下のインターフェースを使用できます。
+
+```python
+    def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage:
+    """
+        create a link message
+
+        :param link: the url of the link
+        :param save_as: save as
+        :return: the link message
+    """
+```
+
+#### テキスト
+テキストメッセージを返す必要がある場合は、以下のインターフェースを使用できます。
+
+```python
+    def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage:
+    """
+        create a text message
+
+        :param text: the text of the message
+        :param save_as: save as
+        :return: the text message
+    """
+```
+
+#### ファイルBLOB
+画像、音声、動画、PPT、Word、Excelなどのファイルの生データを返す必要がある場合は、以下のインターフェースを使用できます。
+
+- `blob` ファイルの生データ（bytes型）
+- `meta` ファイルのメタデータ。ファイルの種類が分かっている場合は、`mime_type`を渡すことをお勧めします。そうでない場合、Difyはデフォルトタイプとして`octet/stream`を使用します。
+
+```python
+    def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
+    """
+        create a blob message
+
+        :param blob: the blob
+        :param meta: meta
+        :param save_as: save as
+        :return: the blob message
+    """
+```
+
+#### JSON
+フォーマットされたJSONを返す必要がある場合は、以下のインターフェースを使用できます。これは通常、ワークフロー内のノード間のデータ伝送に使用されますが、エージェントモードでは、ほとんどの大規模言語モデルもJSONを読み取り、理解することができます。
+
+- `object` Pythonの辞書オブジェクトで、自動的にJSONにシリアライズされます。
+
+```python
+    def create_json_message(self, object: dict) -> ToolInvokeMessage:
+    """
+        create a json message
+    """
+```
+
+### ショートカットツール
+
+大規模モデルアプリケーションでは、以下の2つの一般的なニーズがあります：
+- まず長いテキストを事前に要約し、その要約内容をLLMに渡すことで、元のテキストが長すぎてLLMが処理できない問題を防ぐ
+- ツールが取得したコンテンツがリンクである場合、Webページ情報をクロールしてからLLMに返す必要がある
+
+開発者がこれら2つのニーズを迅速に実装できるよう、以下の2つのショートカットツールを提供しています。
+
+#### テキスト要約ツール
+
+このツールはuser_idと要約するテキストを入力として受け取り、要約されたテキストを返します。Difyは現在のワークスペースのデフォルトモデルを使用して長文を要約します。
+
+```python
+    def summary(self, user_id: str, content: str) -> str:
+    """
+        summary the content
+
+        :param user_id: the user id
+        :param content: the content
+        :return: the summary
+    """
+```
+
+#### Webページクローリングツール
+
+このツールはクロールするWebページのリンクとユーザーエージェント（空でも可）を入力として受け取り、そのWebページの情報を含む文字列を返します。`user_agent`はオプションのパラメータで、ツールを識別するために使用できます。渡さない場合、Difyはデフォルトの`user_agent`を使用します。
+
+```python
+    def get_url(self, url: str, user_agent: str = None) -> str:
+    """
+        get url from the crawled result
+    """ 
+```
+
+### 変数プール
+
+`Tool`内に変数プールを導入し、ツールの実行中に生成された変数やファイルなどを保存します。これらの変数は、ツールの実行中に他のツールが使用することができます。
+
+次に、`DallE3`と`Vectorizer.AI`を例に、変数プールの使用方法を紹介します。
+
+- `DallE3`は画像生成ツールで、テキストに基づいて画像を生成できます。ここでは、`DallE3`にカフェのロゴを生成させます。
+- `Vectorizer.AI`はベクター画像変換ツールで、画像をベクター画像に変換できるため、画像を無限に拡大しても品質が損なわれません。ここでは、`DallE3`が生成したPNGアイコンをベクター画像に変換し、デザイナーが実際に使用できるようにします。
+
+#### DallE3
+まず、DallE3を使用します。画像を作成した後、その画像を変数プールに保存します。コードは以下の通りです：
+
+```python
+from typing import Any, Dict, List, Union
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+from base64 import b64decode
+
+from openai import OpenAI
+
+class DallE3Tool(BuiltinTool):
+    def _invoke(self,
+                user_id: str,
+                tool_parameters: Dict[str, Any],
+                ) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+        client = OpenAI(
+            api_key=self.runtime.credentials['openai_api_key'],
+        )
+
+        # prompt
+        prompt = tool_parameters.get('prompt', '')
+        if not prompt:
+            return self.create_text_message('Please input prompt')
+
+        # call openapi dalle3
+        response = client.images.generate(
+            prompt=prompt, model='dall-e-3',
+            size='1024x1024', n=1, style='vivid', quality='standard',
+            response_format='b64_json'
+        )
+
+        result = []
+        for image in response.data:
+            # Save all images to the variable pool through the save_as parameter. The variable name is self.VARIABLE_KEY.IMAGE.value. If new images are generated later, they will overwrite the previous images.
+            result.append(self.create_blob_message(blob=b64decode(image.b64_json),
+                                                   meta={ 'mime_type': 'image/png' },
+                                                   save_as=self.VARIABLE_KEY.IMAGE.value))
+
+        return result
+```
+
+ここでは画像の変数名として`self.VARIABLE_KEY.IMAGE.value`を使用していることに注意してください。開発者のツールが互いに連携できるよう、この`KEY`を定義しました。自由に使用することも、この`KEY`を使用しないこともできます。カスタムのKEYを渡すこともできます。
+
+#### Vectorizer.AI
+次に、Vectorizer.AIを使用して、DallE3が生成したPNGアイコンをベクター画像に変換します。ここで定義した関数を見てみましょう。コードは以下の通りです：
+
+```python
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from typing import Any, Dict, List, Union
+from httpx import post
+from base64 import b64decode
+
+class VectorizerTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: Dict[str, Any])
+        -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+        """
+        Tool invocation, the image variable name needs to be passed in from here, so that we can get the image from the variable pool
+        """
+
+
+    def get_runtime_parameters(self) -> List[ToolParameter]:
+        """
+        Override the tool parameter list, we can dynamically generate the parameter list based on the actual situation in the current variable pool, so that the LLM can generate the form based on the parameter list
+        """
+
+
+    def is_tool_available(self) -> bool:
+        """
+        Whether the current tool is available, if there is no image in the current variable pool, then we don't need to display this tool, just return False here
+        """     
+```
+
+次に、これら3つの関数を実装します：
+
+```python
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from typing import Any, Dict, List, Union
+from httpx import post
+from base64 import b64decode
+
+class VectorizerTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: Dict[str, Any])
+        -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+        api_key_name = self.runtime.credentials.get('api_key_name', None)
+        api_key_value = self.runtime.credentials.get('api_key_value', None)
+
+        if not api_key_name or not api_key_value:
+            raise ToolProviderCredentialValidationError('Please input api key name and value')
+
+        # Get image_id, the definition of image_id can be found in get_runtime_parameters
+        image_id = tool_parameters.get('image_id', '')
+        if not image_id:
+            return self.create_text_message('Please input image id')
+
+        # Get the image generated by DallE from the variable pool
+        image_binary = self.get_variable_file(self.VARIABLE_KEY.IMAGE)
+        if not image_binary:
+            return self.create_text_message('Image not found, please request user to generate image firstly.')
+
+        # Generate vector image
+        response = post(
+            'https://vectorizer.ai/api/v1/vectorize',
+            files={ 'image': image_binary },
+            data={ 'mode': 'test' },
+            auth=(api_key_name, api_key_value),
+            timeout=30
+        )
+
+        if response.status_code != 200:
+            raise Exception(response.text)
+
+        return [
+            self.create_text_message('the vectorized svg is saved as an image.'),
+            self.create_blob_message(blob=response.content,
+                                     meta={'mime_type': 'image/svg+xml'})
+        ]
+
+    def get_runtime_parameters(self) -> List[ToolParameter]:
+        """
+        override the runtime parameters
+        """
+        # Here, we override the tool parameter list, define the image_id, and set its option list to all images in the current variable pool. The configuration here is consistent with the configuration in yaml.
+        return [
+            ToolParameter.get_simple_instance(
+                name='image_id',
+                llm_description=f'the image id that you want to vectorize, \
+                    and the image id should be specified in \
+                        {[i.name for i in self.list_default_image_variables()]}',
+                type=ToolParameter.ToolParameterType.SELECT,
+                required=True,
+                options=[i.name for i in self.list_default_image_variables()]
+            )
+        ]
+
+    def is_tool_available(self) -> bool:
+        # Only when there are images in the variable pool, the LLM needs to use this tool
+        return len(self.list_default_image_variables()) > 0
+```
+
+ここで注目すべきは、実際には`image_id`を使用していないことです。このツールを呼び出す際には、デフォルトの変数プールに必ず画像があると仮定し、直接`image_binary = self.get_variable_file(self.VARIABLE_KEY.IMAGE)`を使用して画像を取得しています。モデルの能力が弱い場合、開発者にもこの方法を推奨します。これにより、エラー許容度を効果的に向上させ、モデルが誤ったパラメータを渡すのを防ぐことができます。
\ No newline at end of file
diff --git a/api/core/tools/docs/ja_JP/tool_scale_out.md b/api/core/tools/docs/ja_JP/tool_scale_out.md
new file mode 100644
index 0000000000..a721023d00
--- /dev/null
+++ b/api/core/tools/docs/ja_JP/tool_scale_out.md
@@ -0,0 +1,240 @@
+# ツールの迅速な統合
+
+ここでは、GoogleSearchを例にツールを迅速に統合する方法を紹介します。
+
+## 1. ツールプロバイダーのyamlを準備する
+
+### 概要
+
+このyamlファイルには、プロバイダー名、アイコン、作者などの詳細情報が含まれ、フロントエンドでの柔軟な表示を可能にします。
+
+### 例
+
+`core/tools/provider/builtin`の下に`google`モジュール（フォルダ）を作成し、`google.yaml`を作成します。名前はモジュール名と一致している必要があります。
+
+以降、このツールに関するすべての操作はこのモジュール内で行います。
+
+```yaml
+identity: # ツールプロバイダーの基本情報
+  author: Dify # 作者
+  name: google # 名前（一意、他のプロバイダーと重複不可）
+  label: # フロントエンド表示用のラベル
+    en_US: Google # 英語ラベル
+    zh_Hans: Google # 中国語ラベル
+  description: # フロントエンド表示用の説明
+    en_US: Google # 英語説明
+    zh_Hans: Google # 中国語説明
+  icon: icon.svg # アイコン（現在のモジュールの_assetsフォルダに配置）
+  tags: # タグ（フロントエンド表示用）
+    - search
+```
+
+- `identity`フィールドは必須で、ツールプロバイダーの基本情報（作者、名前、ラベル、説明、アイコンなど）が含まれます。
+  - アイコンは現在のモジュールの`_assets`フォルダに配置する必要があります。[こちら](../../provider/builtin/google/_assets/icon.svg)を参照してください。
+  - タグはフロントエンドでの表示に使用され、ユーザーがこのツールプロバイダーを素早く見つけるのに役立ちます。現在サポートされているすべてのタグは以下の通りです：
+    ```python
+    class ToolLabelEnum(Enum):
+      SEARCH = 'search'
+      IMAGE = 'image'
+      VIDEOS = 'videos'
+      WEATHER = 'weather'
+      FINANCE = 'finance'
+      DESIGN = 'design'
+      TRAVEL = 'travel'
+      SOCIAL = 'social'
+      NEWS = 'news'
+      MEDICAL = 'medical'
+      PRODUCTIVITY = 'productivity'
+      EDUCATION = 'education'
+      BUSINESS = 'business'
+      ENTERTAINMENT = 'entertainment'
+      UTILITIES = 'utilities'
+      OTHER = 'other'
+    ```
+
+## 2. プロバイダーの認証情報を準備する
+
+GoogleはSerpApiが提供するAPIを使用するサードパーティツールであり、SerpApiを使用するにはAPI Keyが必要です。つまり、このツールを使用するには認証情報が必要です。一方、`wikipedia`のようなツールでは認証情報フィールドを記入する必要はありません。[こちら](../../provider/builtin/wikipedia/wikipedia.yaml)を参照してください。
+
+認証情報フィールドを設定すると、以下のようになります：
+
+```yaml
+identity:
+  author: Dify
+  name: google
+  label:
+    en_US: Google
+    zh_Hans: Google
+  description:
+    en_US: Google
+    zh_Hans: Google
+  icon: icon.svg
+credentials_for_provider: # 認証情報フィールド
+  serpapi_api_key: # 認証情報フィールド名
+    type: secret-input # 認証情報フィールドタイプ
+    required: true # 必須かどうか
+    label: # 認証情報フィールドラベル
+      en_US: SerpApi API key # 英語ラベル
+      zh_Hans: SerpApi API key # 中国語ラベル
+    placeholder: # 認証情報フィールドプレースホルダー
+      en_US: Please input your SerpApi API key # 英語プレースホルダー
+      zh_Hans: 请输入你的 SerpApi API key # 中国語プレースホルダー
+    help: # 認証情報フィールドヘルプテキスト
+      en_US: Get your SerpApi API key from SerpApi # 英語ヘルプテキスト
+      zh_Hans: 从 SerpApi 获取您的 SerpApi API key # 中国語ヘルプテキスト
+    url: https://serpapi.com/manage-api-key # 認証情報フィールドヘルプリンク
+```
+
+- `type`：認証情報フィールドタイプ。現在、`secret-input`、`text-input`、`select`の3種類をサポートしており、それぞれパスワード入力ボックス、テキスト入力ボックス、ドロップダウンボックスに対応します。`secret-input`の場合、フロントエンドで入力内容が隠され、バックエンドで入力内容が暗号化されます。
+
+## 3. ツールのyamlを準備する
+
+1つのプロバイダーの下に複数のツールを持つことができ、各ツールにはyamlファイルが必要です。このファイルにはツールの基本情報、パラメータ、出力などが含まれます。
+
+引き続きGoogleSearchを例に、`google`モジュールの下に`tools`モジュールを作成し、`tools/google_search.yaml`を作成します。内容は以下の通りです：
+
+```yaml
+identity: # ツールの基本情報
+  name: google_search # ツール名（一意、他のツールと重複不可）
+  author: Dify # 作者
+  label: # フロントエンド表示用のラベル
+    en_US: GoogleSearch # 英語ラベル
+    zh_Hans: 谷歌搜索 # 中国語ラベル
+description: # フロントエンド表示用の説明
+  human: # フロントエンド表示用の紹介（多言語対応）
+    en_US: A tool for performing a Google SERP search and extracting snippets and webpages. Input should be a search query.
+    zh_Hans: 一个用于执行 Google SERP 搜索并提取片段和网页的工具。输入应该是一个搜索查询。
+  llm: A tool for performing a Google SERP search and extracting snippets and webpages. Input should be a search query. # LLMに渡す紹介文。LLMがこのツールをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。
+parameters: # パラメータリスト
+  - name: query # パラメータ名
+    type: string # パラメータタイプ
+    required: true # 必須かどうか
+    label: # パラメータラベル
+      en_US: Query string # 英語ラベル
+      zh_Hans: 查询语句 # 中国語ラベル
+    human_description: # フロントエンド表示用の紹介（多言語対応）
+      en_US: used for searching
+      zh_Hans: 用于搜索网页内容
+    llm_description: key words for searching # LLMに渡す紹介文。LLMがこのパラメータをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。
+    form: llm # フォームタイプ。llmはこのパラメータがAgentによって推論される必要があることを意味し、フロントエンドではこのパラメータは表示されません。
+  - name: result_type
+    type: select # パラメータタイプ
+    required: true
+    options: # ドロップダウンボックスのオプション
+      - value: text
+        label:
+          en_US: text
+          zh_Hans: 文本
+      - value: link
+        label:
+          en_US: link
+          zh_Hans: 链接
+    default: link
+    label:
+      en_US: Result type
+      zh_Hans: 结果类型
+    human_description:
+      en_US: used for selecting the result type, text or link
+      zh_Hans: 用于选择结果类型，使用文本还是链接进行展示
+    form: form # フォームタイプ。formはこのパラメータが対話開始前にフロントエンドでユーザーによって入力される必要があることを意味します。
+```
+
+- `identity`フィールドは必須で、ツールの基本情報（名前、作者、ラベル、説明など）が含まれます。
+- `parameters` パラメータリスト
+  - `name`（必須）パラメータ名。一意で、他のパラメータと重複しないようにしてください。
+  - `type`（必須）パラメータタイプ。現在、`string`、`number`、`boolean`、`select`、`secret-input`の5種類をサポートしており、それぞれ文字列、数値、ブール値、ドロップダウンボックス、暗号化入力ボックスに対応します。機密情報には`secret-input`タイプの使用をお勧めします。
+  - `label`（必須）パラメータラベル。フロントエンド表示用です。
+  - `form`（必須）フォームタイプ。現在、`llm`と`form`の2種類をサポートしています。
+    - エージェントアプリケーションでは、`llm`はこのパラメータがLLM自身によって推論されることを示し、`form`はこのツールを使用するために事前に設定できるパラメータであることを示します。
+    - ワークフローアプリケーションでは、`llm`と`form`の両方がフロントエンドで入力する必要がありますが、`llm`のパラメータはツールノードの入力変数として使用されます。
+  - `required` パラメータが必須かどうかを示します。
+    - `llm`モードでは、パラメータが必須の場合、Agentはこのパラメータを推論する必要があります。
+    - `form`モードでは、パラメータが必須の場合、ユーザーは対話開始前にフロントエンドでこのパラメータを入力する必要があります。
+  - `options` パラメータオプション
+    - `llm`モードでは、DifyはすべてのオプションをLLMに渡し、LLMはこれらのオプションに基づいて推論できます。
+    - `form`モードで、`type`が`select`の場合、フロントエンドはこれらのオプションを表示します。
+  - `default` デフォルト値
+  - `min` 最小値。パラメータタイプが`number`の場合に設定できます。
+  - `max` 最大値。パラメータタイプが`number`の場合に設定できます。
+  - `human_description` フロントエンド表示用の紹介。多言語対応です。
+  - `placeholder` 入力ボックスのプロンプトテキスト。フォームタイプが`form`で、パラメータタイプが`string`、`number`、`secret-input`の場合に設定できます。多言語対応です。
+  - `llm_description` LLMに渡す紹介文。LLMがこのパラメータをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。
+
+## 4. ツールコードを準備する
+
+ツールの設定が完了したら、ツールのロジックを実装するコードを作成します。
+
+`google/tools`モジュールの下に`google_search.py`を作成し、内容は以下の通りです：
+
+```python
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage
+
+from typing import Any, Dict, List, Union
+
+class GoogleSearchTool(BuiltinTool):
+    def _invoke(self, 
+                user_id: str,
+               tool_parameters: Dict[str, Any], 
+        ) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+        """
+            ツールを呼び出す
+        """
+        query = tool_parameters['query']
+        result_type = tool_parameters['result_type']
+        api_key = self.runtime.credentials['serpapi_api_key']
+        result = SerpAPI(api_key).run(query, result_type=result_type)
+
+        if result_type == 'text':
+            return self.create_text_message(text=result)
+        return self.create_link_message(link=result)
+```
+
+### パラメータ
+ツールの全体的なロジックは`_invoke`メソッドにあります。このメソッドは2つのパラメータ（`user_id`とtool_parameters`）を受け取り、それぞれユーザーIDとツールパラメータを表します。
+
+### 戻り値
+ツールの戻り値として、1つのメッセージまたは複数のメッセージを選択できます。ここでは1つのメッセージを返しています。`create_text_message`と`create_link_message`を使用して、テキストメッセージまたはリンクメッセージを作成できます。複数のメッセージを返す場合は、リストを構築できます（例：`[self.create_text_message('msg1'), self.create_text_message('msg2')]`）。
+
+## 5. プロバイダーコードを準備する
+
+最後に、プロバイダーモジュールの下にプロバイダークラスを作成し、プロバイダーの認証情報検証ロジックを実装する必要があります。認証情報の検証が失敗した場合、`ToolProviderCredentialValidationError`例外が発生します。
+
+`google`モジュールの下に`google.py`を作成し、内容は以下の通りです：
+
+```python
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from core.tools.provider.builtin.google.tools.google_search import GoogleSearchTool
+
+from typing import Any, Dict
+
+class GoogleProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: Dict[str, Any]) -> None:
+        try:
+            # 1. ここでGoogleSearchTool()を使ってGoogleSearchToolをインスタンス化する必要があります。これによりGoogleSearchToolのyaml設定が自動的に読み込まれますが、この時点では認証情報は含まれていません
+            # 2. 次に、fork_tool_runtimeメソッドを使用して、現在の認証情報をGoogleSearchToolに渡す必要があります
+            # 3. 最後に、invokeを呼び出します。パラメータはGoogleSearchToolのyamlで設定されたパラメータルールに従って渡す必要があります
+            GoogleSearchTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "query": "test",
+                    "result_type": "link"
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
+```
+
+## 完了
+
+以上のステップが完了すると、このツールをフロントエンドで確認し、Agentで使用することができるようになります。
+
+もちろん、google_searchには認証情報が必要なため、使用する前にフロントエンドで認証情報を入力する必要があります。
+
+![Alt text](../images/index/image-2.png)
\ No newline at end of file
diff --git a/api/core/tools/docs/zh_Hans/tool_scale_out.md b/api/core/tools/docs/zh_Hans/tool_scale_out.md
index 06a8d9a4f9..ec61e4677b 100644
--- a/api/core/tools/docs/zh_Hans/tool_scale_out.md
+++ b/api/core/tools/docs/zh_Hans/tool_scale_out.md
@@ -234,4 +234,4 @@ class GoogleProvider(BuiltinToolProviderController):
 
 当然，因为google_search需要一个凭据，在使用之前，还需要在前端配置它的凭据。
 
-![Alt text](images/index/image-2.png)
+![Alt text](../images/index/image-2.png)
diff --git a/api/core/tools/provider/builtin/cogview/tools/cogview3.py b/api/core/tools/provider/builtin/cogview/tools/cogview3.py
index 9039708588..085084ca38 100644
--- a/api/core/tools/provider/builtin/cogview/tools/cogview3.py
+++ b/api/core/tools/provider/builtin/cogview/tools/cogview3.py
@@ -21,15 +21,22 @@ class CogView3Tool(BuiltinTool):
         )
         size_mapping = {
             "square": "1024x1024",
-            "vertical": "1024x1792",
-            "horizontal": "1792x1024",
+            "vertical_768": "768x1344",
+            "vertical_864": "864x1152",
+            "horizontal_1344": "1344x768",
+            "horizontal_1152": "1152x864",
+            "widescreen_1440": "1440x720",
+            "tallscreen_720": "720x1440",
         }
         # prompt
         prompt = tool_parameters.get("prompt", "")
         if not prompt:
             return self.create_text_message("Please input prompt")
-        # get size
-        size = size_mapping[tool_parameters.get("size", "square")]
+        # get size key
+        size_key = tool_parameters.get("size", "square")
+        # cogview-3-plus get size
+        if size_key != "cogview_3":
+            size = size_mapping[size_key]
         # get n
         n = tool_parameters.get("n", 1)
         # get quality
@@ -43,16 +50,29 @@ class CogView3Tool(BuiltinTool):
         # set extra body
         seed_id = tool_parameters.get("seed_id", self._generate_random_id(8))
         extra_body = {"seed": seed_id}
-        response = client.images.generations(
-            prompt=prompt,
-            model="cogview-3",
-            size=size,
-            n=n,
-            extra_body=extra_body,
-            style=style,
-            quality=quality,
-            response_format="b64_json",
-        )
+        # cogview-3-plus
+        if size_key != "cogview_3":
+            response = client.images.generations(
+                prompt=prompt,
+                model="cogview-3-plus",
+                size=size,
+                n=n,
+                extra_body=extra_body,
+                style=style,
+                quality=quality,
+                response_format="b64_json",
+            )
+        # cogview-3
+        else:
+            response = client.images.generations(
+                prompt=prompt,
+                model="cogview-3",
+                n=n,
+                extra_body=extra_body,
+                style=style,
+                quality=quality,
+                response_format="b64_json",
+            )
         result = []
         for image in response.data:
             result.append(self.create_image_message(image=image.url))
diff --git a/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml b/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml
index 1de3f599b6..9ab5c2729b 100644
--- a/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml
+++ b/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml
@@ -42,21 +42,46 @@ parameters:
       pt_BR: Image size
     form: form
     options:
+      - value: cogview_3
+        label:
+          en_US: Square_cogview_3(1024x1024)
+          zh_Hans: 方_cogview_3(1024x1024)
+          pt_BR: Square_cogview_3(1024x1024)
       - value: square
         label:
-          en_US: Squre(1024x1024)
+          en_US: Square(1024x1024)
           zh_Hans: 方(1024x1024)
-          pt_BR: Squre(1024x1024)
-      - value: vertical
+          pt_BR: Square(1024x1024)
+      - value: vertical_768
         label:
-          en_US: Vertical(1024x1792)
-          zh_Hans: 竖屏(1024x1792)
-          pt_BR: Vertical(1024x1792)
-      - value: horizontal
+          en_US: Vertical(768x1344)
+          zh_Hans: 竖屏(768x1344)
+          pt_BR: Vertical(768x1344)
+      - value: vertical_864
         label:
-          en_US: Horizontal(1792x1024)
-          zh_Hans: 横屏(1792x1024)
-          pt_BR: Horizontal(1792x1024)
+          en_US: Vertical(864x1152)
+          zh_Hans: 竖屏(864x1152)
+          pt_BR: Vertical(864x1152)
+      - value: horizontal_1344
+        label:
+          en_US: Horizontal(1344x768)
+          zh_Hans: 横屏(1344x768)
+          pt_BR: Horizontal(1344x768)
+      - value: horizontal_1152
+        label:
+          en_US: Horizontal(1152x864)
+          zh_Hans: 横屏(1152x864)
+          pt_BR: Horizontal(1152x864)
+      - value: widescreen_1440
+        label:
+          en_US: Widescreen(1440x720)
+          zh_Hans: 宽屏(1440x720)
+          pt_BR: Widescreen(1440x720)
+      - value: tallscreen_720
+        label:
+          en_US: Tallscreen(720x1440)
+          zh_Hans: 高屏(720x1440)
+          pt_BR: Tallscreen(720x1440)
     default: square
   - name: n
     type: number
diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py
index b9b52c0b4d..eaa4b0d027 100644
--- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py
+++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py
@@ -290,7 +290,7 @@ class ComfyuiStableDiffusionTool(BuiltinTool):
         draw_options["6"]["inputs"]["text"] = prompt
         draw_options["7"]["inputs"]["text"] = negative_prompt
         # if the model is SD3 or FLUX series, the Latent class should be corresponding to SD3 Latent
-        if model_type in (ModelType.SD3.name, ModelType.FLUX.name):
+        if model_type in {ModelType.SD3.name, ModelType.FLUX.name}:
             draw_options["5"]["class_type"] = "EmptySD3LatentImage"
 
         if lora_list:
@@ -333,7 +333,7 @@ class ComfyuiStableDiffusionTool(BuiltinTool):
                         break
 
             return self.create_blob_message(
-                blob=image, meta={"mime_type": "image/png"}, save_as=self.VARIABLE_KEY.IMAGE.value
+                blob=image, meta={"mime_type": "image/png"}, save_as=self.VariableKey.IMAGE.value
             )
 
         except Exception as e:
diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py
index 1db9b690ab..8342dbd13d 100644
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@@ -61,6 +61,9 @@ class GraphEngineThreadPool(ThreadPoolExecutor):
 
         return super().submit(fn, *args, **kwargs)
 
+    def task_done_callback(self, future):
+        self.submit_count -= 1
+
     def check_is_full(self) -> None:
         print(f"submit_count: {self.submit_count}, max_submit_count: {self.max_submit_count}")
         if self.submit_count > self.max_submit_count:
@@ -177,16 +180,20 @@ class GraphEngine:
 
             # trigger graph run success event
             yield GraphRunSucceededEvent(outputs=self.graph_runtime_state.outputs)
+            self._release_thread()
         except GraphRunFailedError as e:
             yield GraphRunFailedEvent(error=e.error)
+            self._release_thread()
             return
         except Exception as e:
             logger.exception("Unknown Error when graph running")
             yield GraphRunFailedEvent(error=str(e))
+            self._release_thread()
             raise e
-        finally:
-            if self.is_main_thread_pool and self.thread_pool_id in GraphEngine.workflow_thread_pool_mapping:
-                del GraphEngine.workflow_thread_pool_mapping[self.thread_pool_id]
+
+    def _release_thread(self):
+        if self.is_main_thread_pool and self.thread_pool_id in GraphEngine.workflow_thread_pool_mapping:
+            del GraphEngine.workflow_thread_pool_mapping[self.thread_pool_id]
 
     def _run(
         self,
@@ -426,20 +433,22 @@ class GraphEngine:
             ):
                 continue
 
-            futures.append(
-                self.thread_pool.submit(
-                    self._run_parallel_node,
-                    **{
-                        "flask_app": current_app._get_current_object(),  # type: ignore[attr-defined]
-                        "q": q,
-                        "parallel_id": parallel_id,
-                        "parallel_start_node_id": edge.target_node_id,
-                        "parent_parallel_id": in_parallel_id,
-                        "parent_parallel_start_node_id": parallel_start_node_id,
-                    },
-                )
+            future = self.thread_pool.submit(
+                self._run_parallel_node,
+                **{
+                    "flask_app": current_app._get_current_object(),  # type: ignore[attr-defined]
+                    "q": q,
+                    "parallel_id": parallel_id,
+                    "parallel_start_node_id": edge.target_node_id,
+                    "parent_parallel_id": in_parallel_id,
+                    "parent_parallel_start_node_id": parallel_start_node_id,
+                },
             )
 
+            future.add_done_callback(self.thread_pool.task_done_callback)
+
+            futures.append(future)
+
         succeeded_count = 0
         while True:
             try:
diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py
index 6f20745daf..01bb4e9076 100644
--- a/api/core/workflow/nodes/iteration/iteration_node.py
+++ b/api/core/workflow/nodes/iteration/iteration_node.py
@@ -89,6 +89,7 @@ class IterationNode(BaseNode):
             variable_pool=variable_pool,
             max_execution_steps=dify_config.WORKFLOW_MAX_EXECUTION_STEPS,
             max_execution_time=dify_config.WORKFLOW_MAX_EXECUTION_TIME,
+            thread_pool_id=self.thread_pool_id,
         )
 
         start_at = datetime.now(timezone.utc).replace(tzinfo=None)
diff --git a/api/core/workflow/nodes/question_classifier/template_prompts.py b/api/core/workflow/nodes/question_classifier/template_prompts.py
index ce32b01aa4..4bca2d9dd4 100644
--- a/api/core/workflow/nodes/question_classifier/template_prompts.py
+++ b/api/core/workflow/nodes/question_classifier/template_prompts.py
@@ -2,9 +2,9 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
     ### Job Description',
     You are a text classification engine that analyzes text data and assigns categories based on user input or automatically determined categories.
     ### Task
-    Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.Additionally, you need to extract the key words from the text that are related to the classification.
+    Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
     ### Format
-    The input text is in the variable input_text.Categories are specified as a category list with two filed category_id and category_name in the variable categories .Classification instructions may be included to improve the classification accuracy.
+    The input text is in the variable input_text. Categories are specified as a category list with two filed category_id and category_name in the variable categories. Classification instructions may be included to improve the classification accuracy.
     ### Constraint
     DO NOT include anything other than the JSON array in your response.
     ### Memory
@@ -52,7 +52,7 @@ QUESTION_CLASSIFIER_COMPLETION_PROMPT = """
 ### Job Description
 You are a text classification engine that analyzes text data and assigns categories based on user input or automatically determined categories.
 ### Task
-Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.  Additionally, you need to extract the key words from the text that are related to the classification.
+Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
 ### Format
 The input text is in the variable input_text. Categories are specified as a category list  with two filed category_id and category_name in the variable categories. Classification instructions may be included to improve the classification accuracy.
 ### Constraint 
diff --git a/api/extensions/ext_sentry.py b/api/extensions/ext_sentry.py
index c2dc736038..e255e7eb35 100644
--- a/api/extensions/ext_sentry.py
+++ b/api/extensions/ext_sentry.py
@@ -5,6 +5,8 @@ from sentry_sdk.integrations.celery import CeleryIntegration
 from sentry_sdk.integrations.flask import FlaskIntegration
 from werkzeug.exceptions import HTTPException
 
+from core.model_runtime.errors.invoke import InvokeRateLimitError
+
 
 def before_send(event, hint):
     if "exc_info" in hint:
@@ -20,7 +22,13 @@ def init_app(app):
         sentry_sdk.init(
             dsn=app.config.get("SENTRY_DSN"),
             integrations=[FlaskIntegration(), CeleryIntegration()],
-            ignore_errors=[HTTPException, ValueError, openai.APIStatusError, parse_error.defaultErrorResponse],
+            ignore_errors=[
+                HTTPException,
+                ValueError,
+                openai.APIStatusError,
+                InvokeRateLimitError,
+                parse_error.defaultErrorResponse,
+            ],
             traces_sample_rate=app.config.get("SENTRY_TRACES_SAMPLE_RATE", 1.0),
             profiles_sample_rate=app.config.get("SENTRY_PROFILES_SAMPLE_RATE", 1.0),
             environment=app.config.get("DEPLOY_ENV"),
diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py
index 5ce18b7292..1e6530f6f4 100644
--- a/api/extensions/ext_storage.py
+++ b/api/extensions/ext_storage.py
@@ -1,3 +1,4 @@
+import logging
 from collections.abc import Generator
 from typing import Union
 
@@ -40,28 +41,56 @@ class Storage:
             self.storage_runner = LocalStorage(app=app)
 
     def save(self, filename, data):
-        self.storage_runner.save(filename, data)
+        try:
+            self.storage_runner.save(filename, data)
+        except Exception as e:
+            logging.exception("Failed to save file: %s", e)
+            raise e
 
     def load(self, filename: str, stream: bool = False) -> Union[bytes, Generator]:
-        if stream:
-            return self.load_stream(filename)
-        else:
-            return self.load_once(filename)
+        try:
+            if stream:
+                return self.load_stream(filename)
+            else:
+                return self.load_once(filename)
+        except Exception as e:
+            logging.exception("Failed to load file: %s", e)
+            raise e
 
     def load_once(self, filename: str) -> bytes:
-        return self.storage_runner.load_once(filename)
+        try:
+            return self.storage_runner.load_once(filename)
+        except Exception as e:
+            logging.exception("Failed to load_once file: %s", e)
+            raise e
 
     def load_stream(self, filename: str) -> Generator:
-        return self.storage_runner.load_stream(filename)
+        try:
+            return self.storage_runner.load_stream(filename)
+        except Exception as e:
+            logging.exception("Failed to load_stream file: %s", e)
+            raise e
 
     def download(self, filename, target_filepath):
-        self.storage_runner.download(filename, target_filepath)
+        try:
+            self.storage_runner.download(filename, target_filepath)
+        except Exception as e:
+            logging.exception("Failed to download file: %s", e)
+            raise e
 
     def exists(self, filename):
-        return self.storage_runner.exists(filename)
+        try:
+            return self.storage_runner.exists(filename)
+        except Exception as e:
+            logging.exception("Failed to check file exists: %s", e)
+            raise e
 
     def delete(self, filename):
-        return self.storage_runner.delete(filename)
+        try:
+            return self.storage_runner.delete(filename)
+        except Exception as e:
+            logging.exception("Failed to delete file: %s", e)
+            raise e
 
 
 storage = Storage()
diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py
index 9207314fc2..3dcd88d1de 100644
--- a/api/fields/conversation_fields.py
+++ b/api/fields/conversation_fields.py
@@ -75,6 +75,7 @@ message_detail_fields = {
     "metadata": fields.Raw(attribute="message_metadata_dict"),
     "status": fields.String,
     "error": fields.String,
+    "parent_message_id": fields.String,
 }
 
 feedback_stat_fields = {"like": fields.Integer, "dislike": fields.Integer}
diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py
index 3d2df87afb..c938097131 100644
--- a/api/fields/message_fields.py
+++ b/api/fields/message_fields.py
@@ -62,6 +62,7 @@ retriever_resource_fields = {
 message_fields = {
     "id": fields.String,
     "conversation_id": fields.String,
+    "parent_message_id": fields.String,
     "inputs": fields.Raw,
     "query": fields.String,
     "answer": fields.String(attribute="re_sign_file_url_answer"),
diff --git a/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py
new file mode 100644
index 0000000000..fd957eeafb
--- /dev/null
+++ b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py
@@ -0,0 +1,36 @@
+"""add parent_message_id to messages
+
+Revision ID: d57ba9ebb251
+Revises: 675b5321501b
+Create Date: 2024-09-11 10:12:45.826265
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+import models as models
+
+# revision identifiers, used by Alembic.
+revision = 'd57ba9ebb251'
+down_revision = '675b5321501b'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('messages', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('parent_message_id', models.types.StringUUID(), nullable=True))
+
+    # Set parent_message_id for existing messages to uuid_nil() to distinguish them from new messages with actual parent IDs or NULLs
+    op.execute('UPDATE messages SET parent_message_id = uuid_nil() WHERE parent_message_id IS NULL')
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('messages', schema=None) as batch_op:
+        batch_op.drop_column('parent_message_id')
+
+    # ### end Alembic commands ###
diff --git a/api/models/model.py b/api/models/model.py
index ae0bc3210b..53940a5a16 100644
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -710,6 +710,7 @@ class Message(db.Model):
     answer_tokens = db.Column(db.Integer, nullable=False, server_default=db.text("0"))
     answer_unit_price = db.Column(db.Numeric(10, 4), nullable=False)
     answer_price_unit = db.Column(db.Numeric(10, 7), nullable=False, server_default=db.text("0.001"))
+    parent_message_id = db.Column(StringUUID, nullable=True)
     provider_response_latency = db.Column(db.Float, nullable=False, server_default=db.text("0"))
     total_price = db.Column(db.Numeric(10, 7))
     currency = db.Column(db.String(255), nullable=False)
diff --git a/api/poetry.lock b/api/poetry.lock
index 191db600e4..78816683d8 100644
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -616,13 +616,13 @@ files = [
 
 [[package]]
 name = "azure-core"
-version = "1.30.2"
+version = "1.31.0"
 description = "Microsoft Azure Core Library for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "azure-core-1.30.2.tar.gz", hash = "sha256:a14dc210efcd608821aa472d9fb8e8d035d29b68993819147bc290a8ac224472"},
-    {file = "azure_core-1.30.2-py3-none-any.whl", hash = "sha256:cf019c1ca832e96274ae85abd3d9f752397194d9fea3b41487290562ac8abe4a"},
+    {file = "azure_core-1.31.0-py3-none-any.whl", hash = "sha256:22954de3777e0250029360ef31d80448ef1be13b80a459bff80ba7073379e2cd"},
+    {file = "azure_core-1.31.0.tar.gz", hash = "sha256:656a0dd61e1869b1506b7c6a3b31d62f15984b1a573d6326f6aa2f3e4123284b"},
 ]
 
 [package.dependencies]
@@ -828,13 +828,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.17"
+version = "1.35.19"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.17-py3-none-any.whl", hash = "sha256:a93f773ca93139529b5d36730b382dbee63ab4c7f26129aa5c84835255ca999d"},
-    {file = "botocore-1.35.17.tar.gz", hash = "sha256:0d35d03ea647b5d464c7f77bdab6fb23ae5d49752b13cf97ab84444518c7b1bd"},
+    {file = "botocore-1.35.19-py3-none-any.whl", hash = "sha256:c83f7f0cacfe7c19b109b363ebfa8736e570d24922f16ed371681f58ebab44a9"},
+    {file = "botocore-1.35.19.tar.gz", hash = "sha256:42d6d8db7250cbd7899f786f9861e02cab17dc238f64d6acb976098ed9809625"},
 ]
 
 [package.dependencies]
@@ -2296,18 +2296,18 @@ files = [
 
 [[package]]
 name = "duckduckgo-search"
-version = "6.2.11"
+version = "6.2.12"
 description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "duckduckgo_search-6.2.11-py3-none-any.whl", hash = "sha256:6fb7069b79e8928f487001de6859034ade19201bdcd257ec198802430e374bfe"},
-    {file = "duckduckgo_search-6.2.11.tar.gz", hash = "sha256:6b6ef1b552c5e67f23e252025d2504caf6f9fc14f70e86c6dd512200f386c673"},
+    {file = "duckduckgo_search-6.2.12-py3-none-any.whl", hash = "sha256:0d379c1f845b632a41553efb13d571788f19ad289229e641a27b5710d92097a6"},
+    {file = "duckduckgo_search-6.2.12.tar.gz", hash = "sha256:04f9f1459763668d268344c7a32d943173d0e060dad53a5c2df4b4d3ca9a74cf"},
 ]
 
 [package.dependencies]
 click = ">=8.1.7"
-primp = ">=0.6.1"
+primp = ">=0.6.2"
 
 [package.extras]
 dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"]
@@ -2429,13 +2429,13 @@ test = ["pytest (>=6)"]
 
 [[package]]
 name = "fastapi"
-version = "0.114.1"
+version = "0.114.2"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fastapi-0.114.1-py3-none-any.whl", hash = "sha256:5d4746f6e4b7dff0b4f6b6c6d5445645285f662fe75886e99af7ee2d6b58bb3e"},
-    {file = "fastapi-0.114.1.tar.gz", hash = "sha256:1d7bbbeabbaae0acb0c22f0ab0b040f642d3093ca3645f8c876b6f91391861d8"},
+    {file = "fastapi-0.114.2-py3-none-any.whl", hash = "sha256:44474a22913057b1acb973ab90f4b671ba5200482e7622816d79105dcece1ac5"},
+    {file = "fastapi-0.114.2.tar.gz", hash = "sha256:0adb148b62edb09e8c6eeefa3ea934e8f276dabc038c5a82989ea6346050c3da"},
 ]
 
 [package.dependencies]
@@ -3057,20 +3057,20 @@ tests = ["cython", "hypothesis", "mpmath", "pytest", "setuptools"]
 
 [[package]]
 name = "google-ai-generativelanguage"
-version = "0.6.1"
+version = "0.6.9"
 description = "Google Ai Generativelanguage API client library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google-ai-generativelanguage-0.6.1.tar.gz", hash = "sha256:4abf37000718b20c43f4b90672b3ab8850738b02457efffd11f3184e03272ed2"},
-    {file = "google_ai_generativelanguage-0.6.1-py3-none-any.whl", hash = "sha256:d2afc991c47663bdf65bd4aabcd89723550b81ad0a6d0be8bfb0160755da4cf0"},
+    {file = "google_ai_generativelanguage-0.6.9-py3-none-any.whl", hash = "sha256:50360cd80015d1a8cc70952e98560f32fa06ddee2e8e9f4b4b98e431dc561e0b"},
+    {file = "google_ai_generativelanguage-0.6.9.tar.gz", hash = "sha256:899f1d3a06efa9739f1cd9d2788070178db33c89d4a76f2e8f4da76f649155fa"},
 ]
 
 [package.dependencies]
 google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
 google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
 proto-plus = ">=1.22.3,<2.0.0dev"
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev"
+protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
 
 [[package]]
 name = "google-api-core"
@@ -3336,16 +3336,16 @@ testing = ["pytest"]
 
 [[package]]
 name = "google-generativeai"
-version = "0.5.0"
+version = "0.8.1"
 description = "Google Generative AI High level API client library and tools."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "google_generativeai-0.5.0-py3-none-any.whl", hash = "sha256:207ed12c6a2eeab549a45abbf5373c82077f62b16030bdb502556c78f6d1b5d2"},
+    {file = "google_generativeai-0.8.1-py3-none-any.whl", hash = "sha256:b031877f24d51af0945207657c085896a0a886eceec7a1cb7029327b0aa6e2f6"},
 ]
 
 [package.dependencies]
-google-ai-generativelanguage = "0.6.1"
+google-ai-generativelanguage = "0.6.9"
 google-api-core = "*"
 google-api-python-client = "*"
 google-auth = ">=2.15.0"
@@ -3990,15 +3990,18 @@ files = [
 
 [[package]]
 name = "idna"
-version = "3.8"
+version = "3.9"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"},
-    {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"},
+    {file = "idna-3.9-py3-none-any.whl", hash = "sha256:69297d5da0cc9281c77efffb4e730254dd45943f45bbfb461de5991713989b1e"},
+    {file = "idna-3.9.tar.gz", hash = "sha256:e5c5dafde284f26e9e0f28f6ea2d6400abd5ca099864a67f576f3981c6476124"},
 ]
 
+[package.extras]
+all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+
 [[package]]
 name = "importlib-metadata"
 version = "6.11.0"
@@ -4393,13 +4396,13 @@ six = "*"
 
 [[package]]
 name = "langfuse"
-version = "2.48.0"
+version = "2.48.1"
 description = "A client library for accessing langfuse"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langfuse-2.48.0-py3-none-any.whl", hash = "sha256:475b047e461f8a45e3c7d81b6a87e0b9e389c489d465b838aa69cbdd16eeacce"},
-    {file = "langfuse-2.48.0.tar.gz", hash = "sha256:46e7e6e6e97fe03115a9f95d7f29b3fcd1848a9d1bb34608ebb42a3931919e45"},
+    {file = "langfuse-2.48.1-py3-none-any.whl", hash = "sha256:8661070b6d94ba1d7da92c054f3110b6ecf4489d6e8204a4080f934f3f49ebf2"},
+    {file = "langfuse-2.48.1.tar.gz", hash = "sha256:b8117d90babec6be1bc3303b42e0b71848531eae44118e6e0123d03e7961d0fc"},
 ]
 
 [package.dependencies]
@@ -4418,13 +4421,13 @@ openai = ["openai (>=0.27.8)"]
 
 [[package]]
 name = "langsmith"
-version = "0.1.118"
+version = "0.1.120"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langsmith-0.1.118-py3-none-any.whl", hash = "sha256:f017127b3efb037da5e46ff4f8583e8192e7955191737240c327f3eadc144d7c"},
-    {file = "langsmith-0.1.118.tar.gz", hash = "sha256:ff1ca06c92c6081250244ebbce5d0bb347b9d898d2e9b60a13b11f0f0720f09f"},
+    {file = "langsmith-0.1.120-py3-none-any.whl", hash = "sha256:54d2785e301646c0988e0a69ebe4d976488c87b41928b358cb153b6ddd8db62b"},
+    {file = "langsmith-0.1.120.tar.gz", hash = "sha256:25499ca187b41bd89d784b272b97a8d76f60e0e21bdf20336e8a2aa6a9b23ac9"},
 ]
 
 [package.dependencies]
@@ -6232,13 +6235,13 @@ xmp = ["defusedxml"]
 
 [[package]]
 name = "platformdirs"
-version = "4.3.2"
+version = "4.3.3"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "platformdirs-4.3.2-py3-none-any.whl", hash = "sha256:eb1c8582560b34ed4ba105009a4badf7f6f85768b30126f351328507b2beb617"},
-    {file = "platformdirs-4.3.2.tar.gz", hash = "sha256:9e5e27a08aa095dd127b9f2e764d74254f482fef22b0970773bfba79d091ab8c"},
+    {file = "platformdirs-4.3.3-py3-none-any.whl", hash = "sha256:50a5450e2e84f44539718293cbb1da0a0885c9d14adf21b77bae4e66fc99d9b5"},
+    {file = "platformdirs-4.3.3.tar.gz", hash = "sha256:d4e0b7d8ec176b341fb03cb11ca12d0276faa8c485f9cd218f613840463fc2c0"},
 ]
 
 [package.extras]
@@ -6248,13 +6251,13 @@ type = ["mypy (>=1.11.2)"]
 
 [[package]]
 name = "plotly"
-version = "5.24.0"
+version = "5.24.1"
 description = "An open-source, interactive data visualization library for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "plotly-5.24.0-py3-none-any.whl", hash = "sha256:0e54efe52c8cef899f7daa41be9ed97dfb6be622613a2a8f56a86a0634b2b67e"},
-    {file = "plotly-5.24.0.tar.gz", hash = "sha256:eae9f4f54448682442c92c1e97148e3ad0c52f0cf86306e1b76daba24add554a"},
+    {file = "plotly-5.24.1-py3-none-any.whl", hash = "sha256:f67073a1e637eb0dc3e46324d9d51e2fe76e9727c892dde64ddf1e1b51f29089"},
+    {file = "plotly-5.24.1.tar.gz", hash = "sha256:dbc8ac8339d248a4bcc36e08a5659bacfe1b079390b8953533f4eb22169b4bae"},
 ]
 
 [package.dependencies]
@@ -6356,19 +6359,19 @@ dill = ["dill (>=0.3.8)"]
 
 [[package]]
 name = "primp"
-version = "0.6.1"
+version = "0.6.2"
 description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "primp-0.6.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:60cfe95e0bdf154b0f9036d38acaddc9aef02d6723ed125839b01449672d3946"},
-    {file = "primp-0.6.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:e1e92433ecf32639f9e800bc3a5d58b03792bdec99421b7fb06500e2fae63c85"},
-    {file = "primp-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e02353f13f07fb5a6f91df9e2f4d8ec9f41312de95088744dce1c9729a3865d"},
-    {file = "primp-0.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:c5a2ccfdf488b17be225a529a31e2b22724b2e22fba8e1ae168a222f857c2dc0"},
-    {file = "primp-0.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f335c2ace907800a23bbb7bc6e15acc7fff659b86a2d5858817f6ed79cea07cf"},
-    {file = "primp-0.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5dc15bd9d47ded7bc356fcb5d8321972dcbeba18e7d3b7250e12bb7365447b2b"},
-    {file = "primp-0.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:eebf0412ebba4089547b16b97b765d83f69f1433d811bb02b02cdcdbca20f672"},
-    {file = "primp-0.6.1.tar.gz", hash = "sha256:64b3c12e3d463a887518811c46f3ec37cca02e6af1ddf1287e548342de436301"},
+    {file = "primp-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4a35d441462a55d9a9525bf170e2ffd2fcb3db6039b23e802859fa22c18cdd51"},
+    {file = "primp-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:f67ccade95bdbca3cf9b96b93aa53f9617d85ddbf988da4e9c523aa785fd2d54"},
+    {file = "primp-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8074b93befaf36567e4cf3d4a1a8cd6ab9cc6e4dd4ff710650678daa405aee71"},
+    {file = "primp-0.6.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7d3e2a3f8c6262e9b883651b79c4ff2b7677a76f47293a139f541c9ea333ce3b"},
+    {file = "primp-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:a460ea389371c6d04839b4b50b5805d99da8ebe281a2e8b534d27377c6d44f0e"},
+    {file = "primp-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5b6b27e89d3c05c811aff0e4fde7a36d6957b15b3112f4ce28b6b99e8ca1e725"},
+    {file = "primp-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:1006a40a85f88a4c5222094813a1ebc01f85a63e9a33d2c443288c0720bed321"},
+    {file = "primp-0.6.2.tar.gz", hash = "sha256:5a96a6b65195a8a989157e67d23bd171c49be238654e02bdf1b1fda36cbcc068"},
 ]
 
 [package.extras]
@@ -7025,15 +7028,18 @@ files = [
 
 [[package]]
 name = "pyreadline3"
-version = "3.4.3"
+version = "3.5.2"
 description = "A python implementation of GNU readline."
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "pyreadline3-3.4.3-py3-none-any.whl", hash = "sha256:f832c5898f4f9a0f81d48a8c499b39d0179de1a465ea3def1a7e7231840b4ed6"},
-    {file = "pyreadline3-3.4.3.tar.gz", hash = "sha256:ebab0baca37f50e2faa1dd99a6da1c75de60e0d68a3b229c134bbd12786250e2"},
+    {file = "pyreadline3-3.5.2-py3-none-any.whl", hash = "sha256:a87d56791e2965b2b187e2ea33dcf664600842c997c0623c95cf8ef07db83de9"},
+    {file = "pyreadline3-3.5.2.tar.gz", hash = "sha256:ba82292e52c5a3bb256b291af0c40b457c1e8699cac9a873abbcaac8aef3a1bb"},
 ]
 
+[package.extras]
+dev = ["build", "flake8", "pytest", "twine"]
+
 [[package]]
 name = "pytest"
 version = "8.3.3"
@@ -8778,13 +8784,13 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"]
 
 [[package]]
 name = "tencentcloud-sdk-python-common"
-version = "3.0.1230"
+version = "3.0.1231"
 description = "Tencent Cloud Common SDK for Python"
 optional = false
 python-versions = "*"
 files = [
-    {file = "tencentcloud-sdk-python-common-3.0.1230.tar.gz", hash = "sha256:1e0f3bab80026fcb0083820869239b3f8cf30beb8e00e12c213bdecc75eb7577"},
-    {file = "tencentcloud_sdk_python_common-3.0.1230-py2.py3-none-any.whl", hash = "sha256:03616c79685c154c689536a9c823d52b855cf49eada70679826a92aff5afd596"},
+    {file = "tencentcloud-sdk-python-common-3.0.1231.tar.gz", hash = "sha256:22aa281ca2eac511e1615b2953da7c4a0bec87cf93a05a7a15dbb61b23a215ee"},
+    {file = "tencentcloud_sdk_python_common-3.0.1231-py2.py3-none-any.whl", hash = "sha256:bd0f7c4df4b156ec35c8731afa1f498043c7e1cd5d2feb595ee441fdb45a061e"},
 ]
 
 [package.dependencies]
@@ -8792,17 +8798,17 @@ requests = ">=2.16.0"
 
 [[package]]
 name = "tencentcloud-sdk-python-hunyuan"
-version = "3.0.1230"
+version = "3.0.1231"
 description = "Tencent Cloud Hunyuan SDK for Python"
 optional = false
 python-versions = "*"
 files = [
-    {file = "tencentcloud-sdk-python-hunyuan-3.0.1230.tar.gz", hash = "sha256:900d15cb9dc2217b1282d985898ec7ecf97859351c86c6f7efc74685f08a5f85"},
-    {file = "tencentcloud_sdk_python_hunyuan-3.0.1230-py2.py3-none-any.whl", hash = "sha256:604dab0d4d66ea942f23d7980c76b5f0f6af3d68a8374e619331a4dd2910991e"},
+    {file = "tencentcloud-sdk-python-hunyuan-3.0.1231.tar.gz", hash = "sha256:6da12f418f14305b3a6b7bb29b6d95bf4038a6b66b81c0e03b8dafc4f6df99ca"},
+    {file = "tencentcloud_sdk_python_hunyuan-3.0.1231-py2.py3-none-any.whl", hash = "sha256:21ba28f69c34c15e20900be3f2c06376fcaf7e58265f939833c55631f2348792"},
 ]
 
 [package.dependencies]
-tencentcloud-sdk-python-common = "3.0.1230"
+tencentcloud-sdk-python-common = "3.0.1231"
 
 [[package]]
 name = "threadpoolctl"
@@ -9205,13 +9211,13 @@ typing-extensions = ">=3.7.4.3"
 
 [[package]]
 name = "types-requests"
-version = "2.32.0.20240907"
+version = "2.32.0.20240914"
 description = "Typing stubs for requests"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-requests-2.32.0.20240907.tar.gz", hash = "sha256:ff33935f061b5e81ec87997e91050f7b4af4f82027a7a7a9d9aaea04a963fdf8"},
-    {file = "types_requests-2.32.0.20240907-py3-none-any.whl", hash = "sha256:1d1e79faeaf9d42def77f3c304893dea17a97cae98168ac69f3cb465516ee8da"},
+    {file = "types-requests-2.32.0.20240914.tar.gz", hash = "sha256:2850e178db3919d9bf809e434eef65ba49d0e7e33ac92d588f4a5e295fffd405"},
+    {file = "types_requests-2.32.0.20240914-py3-none-any.whl", hash = "sha256:59c2f673eb55f32a99b2894faf6020e1a9f4a402ad0f192bfee0b64469054310"},
 ]
 
 [package.dependencies]
@@ -9454,13 +9460,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.2.2"
+version = "2.2.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
-    {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
+    {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
+    {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
 ]
 
 [package.extras]
@@ -9614,12 +9620,12 @@ files = [
 
 [[package]]
 name = "volcengine-python-sdk"
-version = "1.0.100"
+version = "1.0.101"
 description = "Volcengine SDK for Python"
 optional = false
 python-versions = "*"
 files = [
-    {file = "volcengine-python-sdk-1.0.100.tar.gz", hash = "sha256:cdc194fe3ce51adda6892d2ca1c43edba3300699321dc6c69119c59fc3b28932"},
+    {file = "volcengine-python-sdk-1.0.101.tar.gz", hash = "sha256:1b76e71a6dcf3d5be1b2c058e7d281359e6cca2cc920ffe2567d3115beea1d02"},
 ]
 
 [package.dependencies]
@@ -10008,13 +10014,13 @@ h11 = ">=0.9.0,<1"
 
 [[package]]
 name = "xinference-client"
-version = "0.13.3"
+version = "0.15.2"
 description = "Client for Xinference"
 optional = false
 python-versions = "*"
 files = [
-    {file = "xinference-client-0.13.3.tar.gz", hash = "sha256:822b722100affdff049c27760be7d62ac92de58c87a40d3361066df446ba648f"},
-    {file = "xinference_client-0.13.3-py3-none-any.whl", hash = "sha256:f0eff3858b1ebcef2129726f82b09259c177e11db466a7ca23def3d4849c419f"},
+    {file = "xinference-client-0.15.2.tar.gz", hash = "sha256:5c2259bb133148d1cc9bd2b8ec6eb8b5bbeba7f11d6252959f4e6cd79baa53ed"},
+    {file = "xinference_client-0.15.2-py3-none-any.whl", hash = "sha256:b6275adab695e75e75a33e21e0ad212488fc2d5a4d0f693d544c0e78469abbe3"},
 ]
 
 [package.dependencies]
@@ -10215,13 +10221,13 @@ requests = "*"
 
 [[package]]
 name = "zipp"
-version = "3.20.1"
+version = "3.20.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "zipp-3.20.1-py3-none-any.whl", hash = "sha256:9960cd8967c8f85a56f920d5d507274e74f9ff813a0ab8889a5b5be2daf44064"},
-    {file = "zipp-3.20.1.tar.gz", hash = "sha256:c22b14cc4763c5a5b04134207736c107db42e9d3ef2d9779d465f5f1bcba572b"},
+    {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
+    {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
 ]
 
 [package.extras]
@@ -10416,4 +10422,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "9173a56b2efea12804c980511e1465fba43c7a3d83b1ad284ee149851ed67fc5"
+content-hash = "eb7ef7be5c7790e214f37f17f92b69407ad557cb80055ef7e49e36eb51b3fca6"
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 8c10f1dad9..506f379aaf 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -100,6 +100,7 @@ exclude = [
 [tool.pytest_env]
 OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
 UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
+FIREWORKS_API_KEY = "fw_aaaaaaaaaaaaaaaaaaaa"
 AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
 AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
 ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"
@@ -155,20 +156,20 @@ flask-restful = "~0.3.10"
 Flask-SQLAlchemy = "~3.1.1"
 gevent = "~23.9.1"
 gmpy2 = "~2.2.1"
-google-ai-generativelanguage = "0.6.1"
+google-ai-generativelanguage = "0.6.9"
 google-api-core = "2.18.0"
 google-api-python-client = "2.90.0"
 google-auth = "2.29.0"
 google-auth-httplib2 = "0.2.0"
 google-cloud-aiplatform = "1.49.0"
 google-cloud-storage = "2.16.0"
-google-generativeai = "0.5.0"
+google-generativeai = "0.8.1"
 googleapis-common-protos = "1.63.0"
 gunicorn = "~22.0.0"
 httpx = { version = "~0.27.0", extras = ["socks"] }
 huggingface-hub = "~0.16.4"
 jieba = "0.42.1"
-langfuse = "^2.36.1"
+langfuse = "^2.48.0"
 langsmith = "^0.1.77"
 mailchimp-transactional = "~1.0.50"
 markdown = "~3.5.1"
@@ -203,7 +204,7 @@ transformers = "~4.35.0"
 unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
 websocket-client = "~1.7.0"
 werkzeug = "~3.0.1"
-xinference-client = "0.13.3"
+xinference-client = "0.15.2"
 yarl = "~1.9.4"
 zhipuai = "1.0.7"
 # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group.
diff --git a/api/services/message_service.py b/api/services/message_service.py
index ecb121c36e..f432a77c80 100644
--- a/api/services/message_service.py
+++ b/api/services/message_service.py
@@ -34,6 +34,7 @@ class MessageService:
         conversation_id: str,
         first_id: Optional[str],
         limit: int,
+        order: str = "asc",
     ) -> InfiniteScrollPagination:
         if not user:
             return InfiniteScrollPagination(data=[], limit=limit, has_more=False)
@@ -91,7 +92,8 @@ class MessageService:
             if rest_count > 0:
                 has_more = True
 
-        history_messages = list(reversed(history_messages))
+        if order == "asc":
+            history_messages = list(reversed(history_messages))
 
         return InfiniteScrollPagination(data=history_messages, limit=limit, has_more=has_more)
 
diff --git a/api/tests/integration_tests/model_runtime/__mock/xinference.py b/api/tests/integration_tests/model_runtime/__mock/xinference.py
index 299523f4f5..8deb50635f 100644
--- a/api/tests/integration_tests/model_runtime/__mock/xinference.py
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -9,7 +9,6 @@ from requests.exceptions import ConnectionError
 from requests.sessions import Session
 from xinference_client.client.restful.restful_client import (
     Client,
-    RESTfulChatglmCppChatModelHandle,
     RESTfulChatModelHandle,
     RESTfulEmbeddingModelHandle,
     RESTfulGenerateModelHandle,
@@ -19,9 +18,7 @@ from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage
 
 
 class MockXinferenceClass:
-    def get_chat_model(
-        self: Client, model_uid: str
-    ) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
+    def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
         if not re.match(r"https?:\/\/[^\s\/$.?#].[^\s]*$", self.base_url):
             raise RuntimeError("404 Not Found")
 
diff --git a/api/tests/integration_tests/model_runtime/fireworks/__init__.py b/api/tests/integration_tests/model_runtime/fireworks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_llm.py b/api/tests/integration_tests/model_runtime/fireworks/test_llm.py
new file mode 100644
index 0000000000..699ca293a2
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_llm.py
@@ -0,0 +1,186 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import AIModelEntity
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.llm.llm import FireworksLargeLanguageModel
+
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+def test_predefined_models():
+    model = FireworksLargeLanguageModel()
+    model_schemas = model.predefined_models()
+
+    assert len(model_schemas) >= 1
+    assert isinstance(model_schemas[0], AIModelEntity)
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
+    model = FireworksLargeLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        # model name to gpt-3.5-turbo because of mocking
+        model.validate_credentials(model="gpt-3.5-turbo", credentials={"fireworks_api_key": "invalid_key"})
+
+    model.validate_credentials(
+        model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+        credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+    )
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
+    model = FireworksLargeLanguageModel()
+
+    result = model.invoke(
+        model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+        credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+        prompt_messages=[
+            SystemPromptMessage(
+                content="You are a helpful AI assistant.",
+            ),
+            UserPromptMessage(content="Hello World!"),
+        ],
+        model_parameters={
+            "temperature": 0.0,
+            "top_p": 1.0,
+            "presence_penalty": 0.0,
+            "frequency_penalty": 0.0,
+            "max_tokens": 10,
+        },
+        stop=["How"],
+        stream=False,
+        user="foo",
+    )
+
+    assert isinstance(result, LLMResult)
+    assert len(result.message.content) > 0
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
+    model = FireworksLargeLanguageModel()
+
+    result = model.invoke(
+        model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+        credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+        prompt_messages=[
+            SystemPromptMessage(
+                content="You are a helpful AI assistant.",
+            ),
+            UserPromptMessage(
+                content="what's the weather today in London?",
+            ),
+        ],
+        model_parameters={"temperature": 0.0, "max_tokens": 100},
+        tools=[
+            PromptMessageTool(
+                name="get_weather",
+                description="Determine weather in my location",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
+                        "unit": {"type": "string", "enum": ["c", "f"]},
+                    },
+                    "required": ["location"],
+                },
+            ),
+            PromptMessageTool(
+                name="get_stock_price",
+                description="Get the current stock price",
+                parameters={
+                    "type": "object",
+                    "properties": {"symbol": {"type": "string", "description": "The stock symbol"}},
+                    "required": ["symbol"],
+                },
+            ),
+        ],
+        stream=False,
+        user="foo",
+    )
+
+    assert isinstance(result, LLMResult)
+    assert isinstance(result.message, AssistantPromptMessage)
+    assert len(result.message.tool_calls) > 0
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
+    model = FireworksLargeLanguageModel()
+
+    result = model.invoke(
+        model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+        credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+        prompt_messages=[
+            SystemPromptMessage(
+                content="You are a helpful AI assistant.",
+            ),
+            UserPromptMessage(content="Hello World!"),
+        ],
+        model_parameters={"temperature": 0.0, "max_tokens": 100},
+        stream=True,
+        user="foo",
+    )
+
+    assert isinstance(result, Generator)
+
+    for chunk in result:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+        if chunk.delta.finish_reason is not None:
+            assert chunk.delta.usage is not None
+            assert chunk.delta.usage.completion_tokens > 0
+
+
+def test_get_num_tokens():
+    model = FireworksLargeLanguageModel()
+
+    num_tokens = model.get_num_tokens(
+        model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+        credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+        prompt_messages=[UserPromptMessage(content="Hello World!")],
+    )
+
+    assert num_tokens == 10
+
+    num_tokens = model.get_num_tokens(
+        model="accounts/fireworks/models/llama-v3p1-8b-instruct",
+        credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")},
+        prompt_messages=[
+            SystemPromptMessage(
+                content="You are a helpful AI assistant.",
+            ),
+            UserPromptMessage(content="Hello World!"),
+        ],
+        tools=[
+            PromptMessageTool(
+                name="get_weather",
+                description="Determine weather in my location",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
+                        "unit": {"type": "string", "enum": ["c", "f"]},
+                    },
+                    "required": ["location"],
+                },
+            ),
+        ],
+    )
+
+    assert num_tokens == 77
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_provider.py b/api/tests/integration_tests/model_runtime/fireworks/test_provider.py
new file mode 100644
index 0000000000..a68cf1a1a8
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_provider.py
@@ -0,0 +1,17 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.fireworks import FireworksProvider
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
+    provider = FireworksProvider()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        provider.validate_provider_credentials(credentials={})
+
+    provider.validate_provider_credentials(credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")})
diff --git a/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py
new file mode 100644
index 0000000000..ba3c1eb5e0
--- /dev/null
+++ b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py
@@ -0,0 +1,91 @@
+from uuid import uuid4
+
+from constants import UUID_NIL
+from core.prompt.utils.extract_thread_messages import extract_thread_messages
+
+
+class TestMessage:
+    def __init__(self, id, parent_message_id):
+        self.id = id
+        self.parent_message_id = parent_message_id
+
+    def __getitem__(self, item):
+        return getattr(self, item)
+
+
+def test_extract_thread_messages_single_message():
+    messages = [TestMessage(str(uuid4()), UUID_NIL)]
+    result = extract_thread_messages(messages)
+    assert len(result) == 1
+    assert result[0] == messages[0]
+
+
+def test_extract_thread_messages_linear_thread():
+    id1, id2, id3, id4, id5 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+    messages = [
+        TestMessage(id5, id4),
+        TestMessage(id4, id3),
+        TestMessage(id3, id2),
+        TestMessage(id2, id1),
+        TestMessage(id1, UUID_NIL),
+    ]
+    result = extract_thread_messages(messages)
+    assert len(result) == 5
+    assert [msg["id"] for msg in result] == [id5, id4, id3, id2, id1]
+
+
+def test_extract_thread_messages_branched_thread():
+    id1, id2, id3, id4 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+    messages = [
+        TestMessage(id4, id2),
+        TestMessage(id3, id2),
+        TestMessage(id2, id1),
+        TestMessage(id1, UUID_NIL),
+    ]
+    result = extract_thread_messages(messages)
+    assert len(result) == 3
+    assert [msg["id"] for msg in result] == [id4, id2, id1]
+
+
+def test_extract_thread_messages_empty_list():
+    messages = []
+    result = extract_thread_messages(messages)
+    assert len(result) == 0
+
+
+def test_extract_thread_messages_partially_loaded():
+    id0, id1, id2, id3 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+    messages = [
+        TestMessage(id3, id2),
+        TestMessage(id2, id1),
+        TestMessage(id1, id0),
+    ]
+    result = extract_thread_messages(messages)
+    assert len(result) == 3
+    assert [msg["id"] for msg in result] == [id3, id2, id1]
+
+
+def test_extract_thread_messages_legacy_messages():
+    id1, id2, id3 = str(uuid4()), str(uuid4()), str(uuid4())
+    messages = [
+        TestMessage(id3, UUID_NIL),
+        TestMessage(id2, UUID_NIL),
+        TestMessage(id1, UUID_NIL),
+    ]
+    result = extract_thread_messages(messages)
+    assert len(result) == 3
+    assert [msg["id"] for msg in result] == [id3, id2, id1]
+
+
+def test_extract_thread_messages_mixed_with_legacy_messages():
+    id1, id2, id3, id4, id5 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4())
+    messages = [
+        TestMessage(id5, id4),
+        TestMessage(id4, id2),
+        TestMessage(id3, id2),
+        TestMessage(id2, UUID_NIL),
+        TestMessage(id1, UUID_NIL),
+    ]
+    result = extract_thread_messages(messages)
+    assert len(result) == 4
+    assert [msg["id"] for msg in result] == [id5, id4, id2, id1]
diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh
index aba13292ab..4c1c6bf4f3 100755
--- a/dev/pytest/pytest_model_runtime.sh
+++ b/dev/pytest/pytest_model_runtime.sh
@@ -6,5 +6,5 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
   api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
   api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
   api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
-  api/tests/integration_tests/model_runtime/upstage
-
+  api/tests/integration_tests/model_runtime/upstage \
+  api/tests/integration_tests/model_runtime/fireworks
diff --git a/docker-legacy/docker-compose.middleware.yaml b/docker-legacy/docker-compose.middleware.yaml
index fadbb3e608..da54fe33fd 100644
--- a/docker-legacy/docker-compose.middleware.yaml
+++ b/docker-legacy/docker-compose.middleware.yaml
@@ -73,7 +73,7 @@ services:
 
   # ssrf_proxy server
   # for more information, please refer to
-  # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed
+  # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
   ssrf_proxy:
     image: ubuntu/squid:latest
     restart: always
diff --git a/docker-legacy/docker-compose.yaml b/docker-legacy/docker-compose.yaml
index f8c5700cd9..1636bb6a21 100644
--- a/docker-legacy/docker-compose.yaml
+++ b/docker-legacy/docker-compose.yaml
@@ -2,7 +2,7 @@ version: '3'
 services:
   # API service
   api:
-    image: langgenius/dify-api:0.8.2
+    image: langgenius/dify-api:0.8.3
     restart: always
     environment:
       # Startup mode, 'api' starts the API server.
@@ -227,7 +227,7 @@ services:
   # worker service
   # The Celery worker for processing the queue.
   worker:
-    image: langgenius/dify-api:0.8.2
+    image: langgenius/dify-api:0.8.3
     restart: always
     environment:
       CONSOLE_WEB_URL: ''
@@ -396,7 +396,7 @@ services:
 
   # Frontend web application.
   web:
-    image: langgenius/dify-web:0.8.2
+    image: langgenius/dify-web:0.8.3
     restart: always
     environment:
       # The base URL of console application api server, refers to the Console base URL of WEB service if console domain is
@@ -500,7 +500,7 @@ services:
 
   # ssrf_proxy server
   # for more information, please refer to
-  # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed
+  # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
   ssrf_proxy:
     image: ubuntu/squid:latest
     restart: always
diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml
index 251c62fee1..d7900def73 100644
--- a/docker/docker-compose.middleware.yaml
+++ b/docker/docker-compose.middleware.yaml
@@ -63,7 +63,7 @@ services:
 
   # ssrf_proxy server
   # for more information, please refer to
-  # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed
+  # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
   ssrf_proxy:
     image: ubuntu/squid:latest
     restart: always
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 0fbc695177..e72c3724f9 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -208,7 +208,7 @@ x-shared-env: &shared-api-worker-env
 services:
   # API service
   api:
-    image: langgenius/dify-api:0.8.2
+    image: langgenius/dify-api:0.8.3
     restart: always
     environment:
       # Use the shared environment variables.
@@ -228,7 +228,7 @@ services:
   # worker service
   # The Celery worker for processing the queue.
   worker:
-    image: langgenius/dify-api:0.8.2
+    image: langgenius/dify-api:0.8.3
     restart: always
     environment:
       # Use the shared environment variables.
@@ -247,7 +247,7 @@ services:
 
   # Frontend web application.
   web:
-    image: langgenius/dify-web:0.8.2
+    image: langgenius/dify-web:0.8.3
     restart: always
     environment:
       CONSOLE_API_URL: ${CONSOLE_API_URL:-}
diff --git a/web/.husky/pre-commit b/web/.husky/pre-commit
index 6df8b24b61..d9290e1853 100755
--- a/web/.husky/pre-commit
+++ b/web/.husky/pre-commit
@@ -51,5 +51,32 @@ if $web_modified; then
     echo "Running ESLint on web module"
     cd ./web || exit 1
     npx lint-staged
+
+    echo "Running unit tests check"
+    modified_files=$(git diff --cached --name-only -- utils | grep -v '\.spec\.ts$' || true)
+
+    if [ -n "$modified_files" ]; then
+        for file in $modified_files; do
+            test_file="${file%.*}.spec.ts"
+            echo "Checking for test file: $test_file"
+
+            # check if the test file exists
+            if [ -f "../$test_file" ]; then
+                echo "Detected changes in $file, running corresponding unit tests..."
+                npm run test "../$test_file"
+
+                if [ $? -ne 0 ]; then
+                    echo "Unit tests failed. Please fix the errors before committing."
+                    exit 1
+                fi
+                echo "Unit tests for $file passed."
+            else
+                echo "Warning: $file does not have a corresponding test file."
+            fi
+
+        done
+        echo "All unit tests for modified web/utils files have passed."
+    fi
+
     cd ../
 fi
diff --git a/web/README.md b/web/README.md
index 867d822e27..a84ef21007 100644
--- a/web/README.md
+++ b/web/README.md
@@ -18,6 +18,10 @@ yarn install --frozen-lockfile
 
 Then, configure the environment variables. Create a file named `.env.local` in the current directory and copy the contents from `.env.example`. Modify the values of these environment variables according to your requirements:
 
+```bash
+cp .env.example .env.local
+```
+
 ```
 # For production release, change this to PRODUCTION
 NEXT_PUBLIC_DEPLOY_ENV=DEVELOPMENT
@@ -78,7 +82,7 @@ If your IDE is VSCode, rename `web/.vscode/settings.example.json` to `web/.vscod
 
 We start to use [Jest](https://jestjs.io/) and [React Testing Library](https://testing-library.com/docs/react-testing-library/intro/) for Unit Testing.
 
-You can create a test file with a suffix of `.spec` beside the file that to be tested. For example, if you want to test a file named `util.ts`. The test file name should be `util.spec.ts`. 
+You can create a test file with a suffix of `.spec` beside the file that to be tested. For example, if you want to test a file named `util.ts`. The test file name should be `util.spec.ts`.
 
 Run test:
 
diff --git a/web/app/activate/activateForm.tsx b/web/app/activate/activateForm.tsx
index 3b1eed6f09..8e9691b354 100644
--- a/web/app/activate/activateForm.tsx
+++ b/web/app/activate/activateForm.tsx
@@ -143,6 +143,7 @@ const ActivateForm = () => {
                     onChange={e => setName(e.target.value)}
                     placeholder={t('login.namePlaceholder') || ''}
                     className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'}
+                    tabIndex={1}
                   />
                 </div>
               </div>
@@ -159,6 +160,7 @@ const ActivateForm = () => {
                     onChange={e => setPassword(e.target.value)}
                     placeholder={t('login.passwordPlaceholder') || ''}
                     className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'}
+                    tabIndex={2}
                   />
                 </div>
                 <div className='mt-1 text-xs text-gray-500'>{t('login.error.passwordInvalid')}</div>
diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
index 65858ce8cf..e538c347d9 100644
--- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
+++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
@@ -263,7 +263,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
             <div>
               <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
               <div className='leading-[18px] text-xs font-normal text-gray-500'>
-                <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+                <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
                 {t('datasetSettings.form.retrievalSetting.description')}
               </div>
             </div>
diff --git a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx
index 80dfb5c534..1c70f4fe77 100644
--- a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx
+++ b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx
@@ -46,6 +46,7 @@ const ChatItem: FC<ChatItemProps> = ({
   const config = useConfigFromDebugContext()
   const {
     chatList,
+    chatListRef,
     isResponding,
     handleSend,
     suggestedQuestions,
@@ -80,6 +81,7 @@ const ChatItem: FC<ChatItemProps> = ({
       query: message,
       inputs,
       model_config: configData,
+      parent_message_id: chatListRef.current.at(-1)?.id || null,
     }
 
     if (visionConfig.enabled && files?.length && supportVision)
@@ -93,7 +95,7 @@ const ChatItem: FC<ChatItemProps> = ({
         onGetSuggestedQuestions: (responseItemId, getAbortController) => fetchSuggestedQuestions(appId, responseItemId, getAbortController),
       },
     )
-  }, [appId, config, handleSend, inputs, modelAndParameter, textGenerationModelList, visionConfig.enabled])
+  }, [appId, config, handleSend, inputs, modelAndParameter, textGenerationModelList, visionConfig.enabled, chatListRef])
 
   const { eventEmitter } = useEventEmitterContextContext()
   eventEmitter?.useSubscription((v: any) => {
diff --git a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
index d93ad00659..5faef46d98 100644
--- a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
+++ b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx
@@ -12,7 +12,7 @@ import {
 import Chat from '@/app/components/base/chat/chat'
 import { useChat } from '@/app/components/base/chat/chat/hooks'
 import { useDebugConfigurationContext } from '@/context/debug-configuration'
-import type { OnSend } from '@/app/components/base/chat/types'
+import type { ChatItem, OnSend } from '@/app/components/base/chat/types'
 import { useProviderContext } from '@/context/provider-context'
 import {
   fetchConversationMessages,
@@ -45,10 +45,12 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
   const config = useConfigFromDebugContext()
   const {
     chatList,
+    chatListRef,
     isResponding,
     handleSend,
     suggestedQuestions,
     handleStop,
+    handleUpdateChatList,
     handleRestart,
     handleAnnotationAdded,
     handleAnnotationEdited,
@@ -64,7 +66,7 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
   )
   useFormattingChangedSubscription(chatList)
 
-  const doSend: OnSend = useCallback((message, files) => {
+  const doSend: OnSend = useCallback((message, files, last_answer) => {
     if (checkCanSend && !checkCanSend())
       return
     const currentProvider = textGenerationModelList.find(item => item.provider === modelConfig.provider)
@@ -81,10 +83,17 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
       },
     }
 
+    const lastAnswer = chatListRef.current.at(-1)
+
     const data: any = {
       query: message,
       inputs,
       model_config: configData,
+      parent_message_id: last_answer?.id || (lastAnswer
+        ? lastAnswer.isOpeningStatement
+          ? null
+          : lastAnswer.id
+        : null),
     }
 
     if (visionConfig.enabled && files?.length && supportVision)
@@ -98,7 +107,23 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
         onGetSuggestedQuestions: (responseItemId, getAbortController) => fetchSuggestedQuestions(appId, responseItemId, getAbortController),
       },
     )
-  }, [appId, checkCanSend, completionParams, config, handleSend, inputs, modelConfig, textGenerationModelList, visionConfig.enabled])
+  }, [chatListRef, appId, checkCanSend, completionParams, config, handleSend, inputs, modelConfig, textGenerationModelList, visionConfig.enabled])
+
+  const doRegenerate = useCallback((chatItem: ChatItem) => {
+    const index = chatList.findIndex(item => item.id === chatItem.id)
+    if (index === -1)
+      return
+
+    const prevMessages = chatList.slice(0, index)
+    const question = prevMessages.pop()
+    const lastAnswer = prevMessages.at(-1)
+
+    if (!question)
+      return
+
+    handleUpdateChatList(prevMessages)
+    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+  }, [chatList, handleUpdateChatList, doSend])
 
   const allToolIcons = useMemo(() => {
     const icons: Record<string, any> = {}
@@ -123,6 +148,7 @@ const DebugWithSingleModel = forwardRef<DebugWithSingleModelRefType, DebugWithSi
       chatFooterClassName='px-6 pt-10 pb-4'
       suggestedQuestions={suggestedQuestions}
       onSend={doSend}
+      onRegenerate={doRegenerate}
       onStopResponding={handleStop}
       showPromptLog
       questionIcon={<Avatar name={userProfile.name} size={40} />}
diff --git a/web/app/components/app/log/list.tsx b/web/app/components/app/log/list.tsx
index caec10c4f7..149e877fa4 100644
--- a/web/app/components/app/log/list.tsx
+++ b/web/app/components/app/log/list.tsx
@@ -16,6 +16,7 @@ import timezone from 'dayjs/plugin/timezone'
 import { createContext, useContext } from 'use-context-selector'
 import { useShallow } from 'zustand/react/shallow'
 import { useTranslation } from 'react-i18next'
+import { UUID_NIL } from '../../base/chat/constants'
 import s from './style.module.css'
 import VarPanel from './var-panel'
 import cn from '@/utils/classnames'
@@ -81,72 +82,92 @@ const PARAM_MAP = {
   frequency_penalty: 'Frequency Penalty',
 }
 
-// Format interface data for easy display
+function appendQAToChatList(newChatList: IChatItem[], item: any, conversationId: string, timezone: string, format: string) {
+  newChatList.push({
+    id: item.id,
+    content: item.answer,
+    agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files),
+    feedback: item.feedbacks.find((item: any) => item.from_source === 'user'), // user feedback
+    adminFeedback: item.feedbacks.find((item: any) => item.from_source === 'admin'), // admin feedback
+    feedbackDisabled: false,
+    isAnswer: true,
+    message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
+    log: [
+      ...item.message,
+      ...(item.message[item.message.length - 1]?.role !== 'assistant'
+        ? [
+          {
+            role: 'assistant',
+            text: item.answer,
+            files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
+          },
+        ]
+        : []),
+    ],
+    workflow_run_id: item.workflow_run_id,
+    conversationId,
+    input: {
+      inputs: item.inputs,
+      query: item.query,
+    },
+    more: {
+      time: dayjs.unix(item.created_at).tz(timezone).format(format),
+      tokens: item.answer_tokens + item.message_tokens,
+      latency: item.provider_response_latency.toFixed(2),
+    },
+    citation: item.metadata?.retriever_resources,
+    annotation: (() => {
+      if (item.annotation_hit_history) {
+        return {
+          id: item.annotation_hit_history.annotation_id,
+          authorName: item.annotation_hit_history.annotation_create_account?.name || 'N/A',
+          created_at: item.annotation_hit_history.created_at,
+        }
+      }
+
+      if (item.annotation) {
+        return {
+          id: item.annotation.id,
+          authorName: item.annotation.account.name,
+          logAnnotation: item.annotation,
+          created_at: 0,
+        }
+      }
+
+      return undefined
+    })(),
+    parentMessageId: `question-${item.id}`,
+  })
+  newChatList.push({
+    id: `question-${item.id}`,
+    content: item.inputs.query || item.inputs.default_input || item.query, // text generation: item.inputs.query; chat: item.query
+    isAnswer: false,
+    message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
+    parentMessageId: item.parent_message_id || undefined,
+  })
+}
+
 const getFormattedChatList = (messages: ChatMessage[], conversationId: string, timezone: string, format: string) => {
   const newChatList: IChatItem[] = []
-  messages.forEach((item: ChatMessage) => {
-    newChatList.push({
-      id: `question-${item.id}`,
-      content: item.inputs.query || item.inputs.default_input || item.query, // text generation: item.inputs.query; chat: item.query
-      isAnswer: false,
-      message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
-    })
-    newChatList.push({
-      id: item.id,
-      content: item.answer,
-      agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files),
-      feedback: item.feedbacks.find(item => item.from_source === 'user'), // user feedback
-      adminFeedback: item.feedbacks.find(item => item.from_source === 'admin'), // admin feedback
-      feedbackDisabled: false,
-      isAnswer: true,
-      message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
-      log: [
-        ...item.message,
-        ...(item.message[item.message.length - 1]?.role !== 'assistant'
-          ? [
-            {
-              role: 'assistant',
-              text: item.answer,
-              files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
-            },
-          ]
-          : []),
-      ],
-      workflow_run_id: item.workflow_run_id,
-      conversationId,
-      input: {
-        inputs: item.inputs,
-        query: item.query,
-      },
-      more: {
-        time: dayjs.unix(item.created_at).tz(timezone).format(format),
-        tokens: item.answer_tokens + item.message_tokens,
-        latency: item.provider_response_latency.toFixed(2),
-      },
-      citation: item.metadata?.retriever_resources,
-      annotation: (() => {
-        if (item.annotation_hit_history) {
-          return {
-            id: item.annotation_hit_history.annotation_id,
-            authorName: item.annotation_hit_history.annotation_create_account?.name || 'N/A',
-            created_at: item.annotation_hit_history.created_at,
-          }
-        }
+  let nextMessageId = null
+  for (const item of messages) {
+    if (!item.parent_message_id) {
+      appendQAToChatList(newChatList, item, conversationId, timezone, format)
+      break
+    }
 
-        if (item.annotation) {
-          return {
-            id: item.annotation.id,
-            authorName: item.annotation.account.name,
-            logAnnotation: item.annotation,
-            created_at: 0,
-          }
-        }
-
-        return undefined
-      })(),
-    })
-  })
-  return newChatList
+    if (!nextMessageId) {
+      appendQAToChatList(newChatList, item, conversationId, timezone, format)
+      nextMessageId = item.parent_message_id
+    }
+    else {
+      if (item.id === nextMessageId || nextMessageId === UUID_NIL) {
+        appendQAToChatList(newChatList, item, conversationId, timezone, format)
+        nextMessageId = item.parent_message_id
+      }
+    }
+  }
+  return newChatList.reverse()
 }
 
 // const displayedParams = CompletionParams.slice(0, -2)
@@ -171,6 +192,7 @@ function DetailPanel<T extends ChatConversationFullDetailResponse | CompletionCo
   })))
   const { t } = useTranslation()
   const [items, setItems] = React.useState<IChatItem[]>([])
+  const fetchedMessages = useRef<ChatMessage[]>([])
   const [hasMore, setHasMore] = useState(true)
   const [varValues, setVarValues] = useState<Record<string, string>>({})
   const fetchData = async () => {
@@ -192,7 +214,8 @@ function DetailPanel<T extends ChatConversationFullDetailResponse | CompletionCo
         const varValues = messageRes.data[0].inputs
         setVarValues(varValues)
       }
-      const newItems = [...getFormattedChatList(messageRes.data, detail.id, timezone!, t('appLog.dateTimeFormat') as string), ...items]
+      fetchedMessages.current = [...fetchedMessages.current, ...messageRes.data]
+      const newItems = getFormattedChatList(fetchedMessages.current, detail.id, timezone!, t('appLog.dateTimeFormat') as string)
       if (messageRes.has_more === false && detail?.model_config?.configs?.introduction) {
         newItems.unshift({
           id: 'introduction',
@@ -435,7 +458,7 @@ function DetailPanel<T extends ChatConversationFullDetailResponse | CompletionCo
             siteInfo={null}
           />
         </div>
-        : items.length < 8
+        : (items.length < 8 && !hasMore)
           ? <div className="pt-4 mb-4">
             <Chat
               config={{
diff --git a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx
index 5a7bf1f17e..225bbac714 100644
--- a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx
+++ b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx
@@ -2,6 +2,7 @@ import { useCallback, useEffect, useMemo } from 'react'
 import Chat from '../chat'
 import type {
   ChatConfig,
+  ChatItem,
   OnSend,
 } from '../types'
 import { useChat } from '../chat/hooks'
@@ -44,6 +45,8 @@ const ChatWrapper = () => {
   }, [appParams, currentConversationItem?.introduction, currentConversationId])
   const {
     chatList,
+    chatListRef,
+    handleUpdateChatList,
     handleSend,
     handleStop,
     isResponding,
@@ -63,11 +66,18 @@ const ChatWrapper = () => {
       currentChatInstanceRef.current.handleStop = handleStop
   }, [])
 
-  const doSend: OnSend = useCallback((message, files) => {
+  const doSend: OnSend = useCallback((message, files, last_answer) => {
+    const lastAnswer = chatListRef.current.at(-1)
+
     const data: any = {
       query: message,
       inputs: currentConversationId ? currentConversationItem?.inputs : newConversationInputs,
       conversation_id: currentConversationId,
+      parent_message_id: last_answer?.id || (lastAnswer
+        ? lastAnswer.isOpeningStatement
+          ? null
+          : lastAnswer.id
+        : null),
     }
 
     if (appConfig?.file_upload?.image.enabled && files?.length)
@@ -83,6 +93,7 @@ const ChatWrapper = () => {
       },
     )
   }, [
+    chatListRef,
     appConfig,
     currentConversationId,
     currentConversationItem,
@@ -92,6 +103,23 @@ const ChatWrapper = () => {
     isInstalledApp,
     appId,
   ])
+
+  const doRegenerate = useCallback((chatItem: ChatItem) => {
+    const index = chatList.findIndex(item => item.id === chatItem.id)
+    if (index === -1)
+      return
+
+    const prevMessages = chatList.slice(0, index)
+    const question = prevMessages.pop()
+    const lastAnswer = prevMessages.at(-1)
+
+    if (!question)
+      return
+
+    handleUpdateChatList(prevMessages)
+    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+  }, [chatList, handleUpdateChatList, doSend])
+
   const chatNode = useMemo(() => {
     if (inputsForms.length) {
       return (
@@ -148,6 +176,7 @@ const ChatWrapper = () => {
       chatFooterClassName='pb-4'
       chatFooterInnerClassName={`mx-auto w-full max-w-full ${isMobile && 'px-4'}`}
       onSend={doSend}
+      onRegenerate={doRegenerate}
       onStopResponding={handleStop}
       chatNode={chatNode}
       allToolIcons={appMeta?.tool_icons || {}}
diff --git a/web/app/components/base/chat/chat-with-history/hooks.tsx b/web/app/components/base/chat/chat-with-history/hooks.tsx
index 1e05cc39ef..b9ebc42ec8 100644
--- a/web/app/components/base/chat/chat-with-history/hooks.tsx
+++ b/web/app/components/base/chat/chat-with-history/hooks.tsx
@@ -12,10 +12,10 @@ import produce from 'immer'
 import type {
   Callback,
   ChatConfig,
-  ChatItem,
   Feedback,
 } from '../types'
 import { CONVERSATION_ID_INFO } from '../constants'
+import { getPrevChatList } from '../utils'
 import {
   delConversation,
   fetchAppInfo,
@@ -34,7 +34,6 @@ import type {
   AppData,
   ConversationItem,
 } from '@/models/share'
-import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils'
 import { useToastContext } from '@/app/components/base/toast'
 import { changeLanguage } from '@/i18n/i18next-config'
 import { useAppFavicon } from '@/hooks/use-app-favicon'
@@ -108,32 +107,12 @@ export const useChatWithHistory = (installedAppInfo?: InstalledApp) => {
   const { data: appConversationData, isLoading: appConversationDataLoading, mutate: mutateAppConversationData } = useSWR(['appConversationData', isInstalledApp, appId, false], () => fetchConversations(isInstalledApp, appId, undefined, false, 100))
   const { data: appChatListData, isLoading: appChatListDataLoading } = useSWR(chatShouldReloadKey ? ['appChatList', chatShouldReloadKey, isInstalledApp, appId] : null, () => fetchChatList(chatShouldReloadKey, isInstalledApp, appId))
 
-  const appPrevChatList = useMemo(() => {
-    const data = appChatListData?.data || []
-    const chatList: ChatItem[] = []
-
-    if (currentConversationId && data.length) {
-      data.forEach((item: any) => {
-        chatList.push({
-          id: `question-${item.id}`,
-          content: item.query,
-          isAnswer: false,
-          message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
-        })
-        chatList.push({
-          id: item.id,
-          content: item.answer,
-          agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files),
-          feedback: item.feedback,
-          isAnswer: true,
-          citation: item.retriever_resources,
-          message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
-        })
-      })
-    }
-
-    return chatList
-  }, [appChatListData, currentConversationId])
+  const appPrevChatList = useMemo(
+    () => (currentConversationId && appChatListData?.data.length)
+      ? getPrevChatList(appChatListData.data)
+      : [],
+    [appChatListData, currentConversationId],
+  )
 
   const [showNewConversationItemInList, setShowNewConversationItemInList] = useState(false)
 
diff --git a/web/app/components/base/chat/chat/answer/index.tsx b/web/app/components/base/chat/chat/answer/index.tsx
index 5fe2a7bad5..705cd73ddf 100644
--- a/web/app/components/base/chat/chat/answer/index.tsx
+++ b/web/app/components/base/chat/chat/answer/index.tsx
@@ -35,6 +35,7 @@ type AnswerProps = {
   chatAnswerContainerInner?: string
   hideProcessDetail?: boolean
   appData?: AppData
+  noChatInput?: boolean
 }
 const Answer: FC<AnswerProps> = ({
   item,
@@ -48,6 +49,7 @@ const Answer: FC<AnswerProps> = ({
   chatAnswerContainerInner,
   hideProcessDetail,
   appData,
+  noChatInput,
 }) => {
   const { t } = useTranslation()
   const {
@@ -110,6 +112,7 @@ const Answer: FC<AnswerProps> = ({
                   question={question}
                   index={index}
                   showPromptLog={showPromptLog}
+                  noChatInput={noChatInput}
                 />
               )
             }
diff --git a/web/app/components/base/chat/chat/answer/operation.tsx b/web/app/components/base/chat/chat/answer/operation.tsx
index 08267bb09c..5e5fc3b204 100644
--- a/web/app/components/base/chat/chat/answer/operation.tsx
+++ b/web/app/components/base/chat/chat/answer/operation.tsx
@@ -7,6 +7,7 @@ import {
 import { useTranslation } from 'react-i18next'
 import type { ChatItem } from '../../types'
 import { useChatContext } from '../context'
+import RegenerateBtn from '@/app/components/base/regenerate-btn'
 import cn from '@/utils/classnames'
 import CopyBtn from '@/app/components/base/copy-btn'
 import { MessageFast } from '@/app/components/base/icons/src/vender/solid/communication'
@@ -28,6 +29,7 @@ type OperationProps = {
   maxSize: number
   contentWidth: number
   hasWorkflowProcess: boolean
+  noChatInput?: boolean
 }
 const Operation: FC<OperationProps> = ({
   item,
@@ -37,6 +39,7 @@ const Operation: FC<OperationProps> = ({
   maxSize,
   contentWidth,
   hasWorkflowProcess,
+  noChatInput,
 }) => {
   const { t } = useTranslation()
   const {
@@ -45,6 +48,7 @@ const Operation: FC<OperationProps> = ({
     onAnnotationEdited,
     onAnnotationRemoved,
     onFeedback,
+    onRegenerate,
   } = useChatContext()
   const [isShowReplyModal, setIsShowReplyModal] = useState(false)
   const {
@@ -159,12 +163,13 @@ const Operation: FC<OperationProps> = ({
             </div>
           )
         }
+        {
+          !isOpeningStatement && !noChatInput && <RegenerateBtn className='hidden group-hover:block mr-1' onClick={() => onRegenerate?.(item)} />
+        }
         {
           config?.supportFeedback && !localFeedback?.rating && onFeedback && !isOpeningStatement && (
-            <div className='hidden group-hover:flex ml-1 shrink-0 items-center px-0.5 bg-white border-[0.5px] border-gray-100 shadow-md text-gray-500 rounded-lg'>
-              <Tooltip
-                popupContent={t('appDebug.operation.agree')}
-              >
+            <div className='hidden group-hover:flex shrink-0 items-center px-0.5 bg-white border-[0.5px] border-gray-100 shadow-md text-gray-500 rounded-lg'>
+              <Tooltip popupContent={t('appDebug.operation.agree')}>
                 <div
                   className='flex items-center justify-center mr-0.5 w-6 h-6 rounded-md hover:bg-black/5 hover:text-gray-800 cursor-pointer'
                   onClick={() => handleFeedback('like')}
diff --git a/web/app/components/base/chat/chat/context.tsx b/web/app/components/base/chat/chat/context.tsx
index ba6f67189e..c47b750176 100644
--- a/web/app/components/base/chat/chat/context.tsx
+++ b/web/app/components/base/chat/chat/context.tsx
@@ -12,6 +12,7 @@ export type ChatContextValue = Pick<ChatProps, 'config'
   | 'answerIcon'
   | 'allToolIcons'
   | 'onSend'
+  | 'onRegenerate'
   | 'onAnnotationEdited'
   | 'onAnnotationAdded'
   | 'onAnnotationRemoved'
@@ -36,6 +37,7 @@ export const ChatContextProvider = ({
   answerIcon,
   allToolIcons,
   onSend,
+  onRegenerate,
   onAnnotationEdited,
   onAnnotationAdded,
   onAnnotationRemoved,
@@ -51,6 +53,7 @@ export const ChatContextProvider = ({
       answerIcon,
       allToolIcons,
       onSend,
+      onRegenerate,
       onAnnotationEdited,
       onAnnotationAdded,
       onAnnotationRemoved,
diff --git a/web/app/components/base/chat/chat/hooks.ts b/web/app/components/base/chat/chat/hooks.ts
index 892f88c4ad..dfb5a1b685 100644
--- a/web/app/components/base/chat/chat/hooks.ts
+++ b/web/app/components/base/chat/chat/hooks.ts
@@ -647,7 +647,8 @@ export const useChat = (
 
   return {
     chatList,
-    setChatList,
+    chatListRef,
+    handleUpdateChatList,
     conversationId: conversationId.current,
     isResponding,
     setIsResponding,
diff --git a/web/app/components/base/chat/chat/index.tsx b/web/app/components/base/chat/chat/index.tsx
index 68194193c4..2847ae20c3 100644
--- a/web/app/components/base/chat/chat/index.tsx
+++ b/web/app/components/base/chat/chat/index.tsx
@@ -16,6 +16,7 @@ import type {
   ChatConfig,
   ChatItem,
   Feedback,
+  OnRegenerate,
   OnSend,
 } from '../types'
 import type { ThemeBuilder } from '../embedded-chatbot/theme/theme-context'
@@ -42,6 +43,7 @@ export type ChatProps = {
   onStopResponding?: () => void
   noChatInput?: boolean
   onSend?: OnSend
+  onRegenerate?: OnRegenerate
   chatContainerClassName?: string
   chatContainerInnerClassName?: string
   chatFooterClassName?: string
@@ -67,6 +69,7 @@ const Chat: FC<ChatProps> = ({
   appData,
   config,
   onSend,
+  onRegenerate,
   chatList,
   isResponding,
   noStopResponding,
@@ -186,6 +189,7 @@ const Chat: FC<ChatProps> = ({
       answerIcon={answerIcon}
       allToolIcons={allToolIcons}
       onSend={onSend}
+      onRegenerate={onRegenerate}
       onAnnotationAdded={onAnnotationAdded}
       onAnnotationEdited={onAnnotationEdited}
       onAnnotationRemoved={onAnnotationRemoved}
@@ -219,6 +223,7 @@ const Chat: FC<ChatProps> = ({
                       showPromptLog={showPromptLog}
                       chatAnswerContainerInner={chatAnswerContainerInner}
                       hideProcessDetail={hideProcessDetail}
+                      noChatInput={noChatInput}
                     />
                   )
                 }
diff --git a/web/app/components/base/chat/chat/type.ts b/web/app/components/base/chat/chat/type.ts
index b2cb18011c..dd26a4179d 100644
--- a/web/app/components/base/chat/chat/type.ts
+++ b/web/app/components/base/chat/chat/type.ts
@@ -95,6 +95,7 @@ export type IChatItem = {
   // for agent log
   conversationId?: string
   input?: any
+  parentMessageId?: string
 }
 
 export type Metadata = {
diff --git a/web/app/components/base/chat/constants.ts b/web/app/components/base/chat/constants.ts
index 8249be7375..309f0f04a7 100644
--- a/web/app/components/base/chat/constants.ts
+++ b/web/app/components/base/chat/constants.ts
@@ -1 +1,2 @@
 export const CONVERSATION_ID_INFO = 'conversationIdInfo'
+export const UUID_NIL = '00000000-0000-0000-0000-000000000000'
diff --git a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx
index 48ee411058..ed2f24274d 100644
--- a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx
+++ b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx
@@ -2,6 +2,7 @@ import { useCallback, useEffect, useMemo } from 'react'
 import Chat from '../chat'
 import type {
   ChatConfig,
+  ChatItem,
   OnSend,
 } from '../types'
 import { useChat } from '../chat/hooks'
@@ -45,11 +46,13 @@ const ChatWrapper = () => {
     } as ChatConfig
   }, [appParams, currentConversationItem?.introduction, currentConversationId])
   const {
+    chatListRef,
     chatList,
     handleSend,
     handleStop,
     isResponding,
     suggestedQuestions,
+    handleUpdateChatList,
   } = useChat(
     appConfig,
     {
@@ -65,11 +68,18 @@ const ChatWrapper = () => {
       currentChatInstanceRef.current.handleStop = handleStop
   }, [])
 
-  const doSend: OnSend = useCallback((message, files) => {
+  const doSend: OnSend = useCallback((message, files, last_answer) => {
+    const lastAnswer = chatListRef.current.at(-1)
+
     const data: any = {
       query: message,
       inputs: currentConversationId ? currentConversationItem?.inputs : newConversationInputs,
       conversation_id: currentConversationId,
+      parent_message_id: last_answer?.id || (lastAnswer
+        ? lastAnswer.isOpeningStatement
+          ? null
+          : lastAnswer.id
+        : null),
     }
 
     if (appConfig?.file_upload?.image.enabled && files?.length)
@@ -85,6 +95,7 @@ const ChatWrapper = () => {
       },
     )
   }, [
+    chatListRef,
     appConfig,
     currentConversationId,
     currentConversationItem,
@@ -94,6 +105,23 @@ const ChatWrapper = () => {
     isInstalledApp,
     appId,
   ])
+
+  const doRegenerate = useCallback((chatItem: ChatItem) => {
+    const index = chatList.findIndex(item => item.id === chatItem.id)
+    if (index === -1)
+      return
+
+    const prevMessages = chatList.slice(0, index)
+    const question = prevMessages.pop()
+    const lastAnswer = prevMessages.at(-1)
+
+    if (!question)
+      return
+
+    handleUpdateChatList(prevMessages)
+    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+  }, [chatList, handleUpdateChatList, doSend])
+
   const chatNode = useMemo(() => {
     if (inputsForms.length) {
       return (
@@ -136,6 +164,7 @@ const ChatWrapper = () => {
       chatFooterClassName='pb-4'
       chatFooterInnerClassName={cn('mx-auto w-full max-w-full tablet:px-4', isMobile && 'px-4')}
       onSend={doSend}
+      onRegenerate={doRegenerate}
       onStopResponding={handleStop}
       chatNode={chatNode}
       allToolIcons={appMeta?.tool_icons || {}}
diff --git a/web/app/components/base/chat/embedded-chatbot/hooks.tsx b/web/app/components/base/chat/embedded-chatbot/hooks.tsx
index 39d25f57d1..fd89efcbff 100644
--- a/web/app/components/base/chat/embedded-chatbot/hooks.tsx
+++ b/web/app/components/base/chat/embedded-chatbot/hooks.tsx
@@ -11,10 +11,10 @@ import { useLocalStorageState } from 'ahooks'
 import produce from 'immer'
 import type {
   ChatConfig,
-  ChatItem,
   Feedback,
 } from '../types'
 import { CONVERSATION_ID_INFO } from '../constants'
+import { getPrevChatList, getProcessedInputsFromUrlParams } from '../utils'
 import {
   fetchAppInfo,
   fetchAppMeta,
@@ -28,10 +28,8 @@ import type {
   // AppData,
   ConversationItem,
 } from '@/models/share'
-import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils'
 import { useToastContext } from '@/app/components/base/toast'
 import { changeLanguage } from '@/i18n/i18next-config'
-import { getProcessedInputsFromUrlParams } from '@/app/components/base/chat/utils'
 
 export const useEmbeddedChatbot = () => {
   const isInstalledApp = false
@@ -75,32 +73,12 @@ export const useEmbeddedChatbot = () => {
   const { data: appConversationData, isLoading: appConversationDataLoading, mutate: mutateAppConversationData } = useSWR(['appConversationData', isInstalledApp, appId, false], () => fetchConversations(isInstalledApp, appId, undefined, false, 100))
   const { data: appChatListData, isLoading: appChatListDataLoading } = useSWR(chatShouldReloadKey ? ['appChatList', chatShouldReloadKey, isInstalledApp, appId] : null, () => fetchChatList(chatShouldReloadKey, isInstalledApp, appId))
 
-  const appPrevChatList = useMemo(() => {
-    const data = appChatListData?.data || []
-    const chatList: ChatItem[] = []
-
-    if (currentConversationId && data.length) {
-      data.forEach((item: any) => {
-        chatList.push({
-          id: `question-${item.id}`,
-          content: item.query,
-          isAnswer: false,
-          message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
-        })
-        chatList.push({
-          id: item.id,
-          content: item.answer,
-          agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files),
-          feedback: item.feedback,
-          isAnswer: true,
-          citation: item.retriever_resources,
-          message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
-        })
-      })
-    }
-
-    return chatList
-  }, [appChatListData, currentConversationId])
+  const appPrevChatList = useMemo(
+    () => (currentConversationId && appChatListData?.data.length)
+      ? getPrevChatList(appChatListData.data)
+      : [],
+    [appChatListData, currentConversationId],
+  )
 
   const [showNewConversationItemInList, setShowNewConversationItemInList] = useState(false)
 
@@ -155,7 +133,7 @@ export const useEmbeddedChatbot = () => {
         type: 'text-input',
       }
     })
-  }, [appParams])
+  }, [initInputs, appParams])
 
   useEffect(() => {
     // init inputs from url params
diff --git a/web/app/components/base/chat/types.ts b/web/app/components/base/chat/types.ts
index 21277fec57..489dbb44cf 100644
--- a/web/app/components/base/chat/types.ts
+++ b/web/app/components/base/chat/types.ts
@@ -63,7 +63,9 @@ export type ChatItem = IChatItem & {
   conversationId?: string
 }
 
-export type OnSend = (message: string, files?: VisionFile[]) => void
+export type OnSend = (message: string, files?: VisionFile[], last_answer?: ChatItem) => void
+
+export type OnRegenerate = (chatItem: ChatItem) => void
 
 export type Callback = {
   onSuccess: () => void
diff --git a/web/app/components/base/chat/utils.ts b/web/app/components/base/chat/utils.ts
index 3fe5050cc7..e851c4c463 100644
--- a/web/app/components/base/chat/utils.ts
+++ b/web/app/components/base/chat/utils.ts
@@ -1,7 +1,11 @@
+import { addFileInfos, sortAgentSorts } from '../../tools/utils'
+import { UUID_NIL } from './constants'
+import type { ChatItem } from './types'
+
 async function decodeBase64AndDecompress(base64String: string) {
   const binaryString = atob(base64String)
   const compressedUint8Array = Uint8Array.from(binaryString, char => char.charCodeAt(0))
-  const decompressedStream = new Response(compressedUint8Array).body.pipeThrough(new DecompressionStream('gzip'))
+  const decompressedStream = new Response(compressedUint8Array).body?.pipeThrough(new DecompressionStream('gzip'))
   const decompressedArrayBuffer = await new Response(decompressedStream).arrayBuffer()
   return new TextDecoder().decode(decompressedArrayBuffer)
 }
@@ -15,6 +19,57 @@ function getProcessedInputsFromUrlParams(): Record<string, any> {
   return inputs
 }
 
+function appendQAToChatList(chatList: ChatItem[], item: any) {
+  // we append answer first and then question since will reverse the whole chatList later
+  chatList.push({
+    id: item.id,
+    content: item.answer,
+    agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files),
+    feedback: item.feedback,
+    isAnswer: true,
+    citation: item.retriever_resources,
+    message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
+  })
+  chatList.push({
+    id: `question-${item.id}`,
+    content: item.query,
+    isAnswer: false,
+    message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
+  })
+}
+
+/**
+ * Computes the latest thread messages from all messages of the conversation.
+ * Same logic as backend codebase `api/core/prompt/utils/extract_thread_messages.py`
+ *
+ * @param fetchedMessages - The history chat list data from the backend, sorted by created_at in descending order. This includes all flattened history messages of the conversation.
+ * @returns An array of ChatItems representing the latest thread.
+ */
+function getPrevChatList(fetchedMessages: any[]) {
+  const ret: ChatItem[] = []
+  let nextMessageId = null
+
+  for (const item of fetchedMessages) {
+    if (!item.parent_message_id) {
+      appendQAToChatList(ret, item)
+      break
+    }
+
+    if (!nextMessageId) {
+      appendQAToChatList(ret, item)
+      nextMessageId = item.parent_message_id
+    }
+    else {
+      if (item.id === nextMessageId || nextMessageId === UUID_NIL) {
+        appendQAToChatList(ret, item)
+        nextMessageId = item.parent_message_id
+      }
+    }
+  }
+  return ret.reverse()
+}
+
 export {
   getProcessedInputsFromUrlParams,
+  getPrevChatList,
 }
diff --git a/web/app/components/base/icons/assets/vender/line/general/refresh.svg b/web/app/components/base/icons/assets/vender/line/general/refresh.svg
new file mode 100644
index 0000000000..05cf986827
--- /dev/null
+++ b/web/app/components/base/icons/assets/vender/line/general/refresh.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"><path d="M5.46257 4.43262C7.21556 2.91688 9.5007 2 12 2C17.5228 2 22 6.47715 22 12C22 14.1361 21.3302 16.1158 20.1892 17.7406L17 12H20C20 7.58172 16.4183 4 12 4C9.84982 4 7.89777 4.84827 6.46023 6.22842L5.46257 4.43262ZM18.5374 19.5674C16.7844 21.0831 14.4993 22 12 22C6.47715 22 2 17.5228 2 12C2 9.86386 2.66979 7.88416 3.8108 6.25944L7 12H4C4 16.4183 7.58172 20 12 20C14.1502 20 16.1022 19.1517 17.5398 17.7716L18.5374 19.5674Z"></path></svg>
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/vender/line/general/Refresh.json b/web/app/components/base/icons/src/vender/line/general/Refresh.json
new file mode 100644
index 0000000000..128dcb7d4d
--- /dev/null
+++ b/web/app/components/base/icons/src/vender/line/general/Refresh.json
@@ -0,0 +1,23 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"xmlns": "http://www.w3.org/2000/svg",
+			"viewBox": "0 0 24 24",
+			"fill": "currentColor"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "path",
+				"attributes": {
+					"d": "M5.46257 4.43262C7.21556 2.91688 9.5007 2 12 2C17.5228 2 22 6.47715 22 12C22 14.1361 21.3302 16.1158 20.1892 17.7406L17 12H20C20 7.58172 16.4183 4 12 4C9.84982 4 7.89777 4.84827 6.46023 6.22842L5.46257 4.43262ZM18.5374 19.5674C16.7844 21.0831 14.4993 22 12 22C6.47715 22 2 17.5228 2 12C2 9.86386 2.66979 7.88416 3.8108 6.25944L7 12H4C4 16.4183 7.58172 20 12 20C14.1502 20 16.1022 19.1517 17.5398 17.7716L18.5374 19.5674Z"
+				},
+				"children": []
+			}
+		]
+	},
+	"name": "Refresh"
+}
\ No newline at end of file
diff --git a/web/app/components/base/icons/src/vender/line/general/Refresh.tsx b/web/app/components/base/icons/src/vender/line/general/Refresh.tsx
new file mode 100644
index 0000000000..96641f1c42
--- /dev/null
+++ b/web/app/components/base/icons/src/vender/line/general/Refresh.tsx
@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './Refresh.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'Refresh'
+
+export default Icon
diff --git a/web/app/components/base/icons/src/vender/line/general/index.ts b/web/app/components/base/icons/src/vender/line/general/index.ts
index c1af2e4994..b5c7a7bbc1 100644
--- a/web/app/components/base/icons/src/vender/line/general/index.ts
+++ b/web/app/components/base/icons/src/vender/line/general/index.ts
@@ -18,6 +18,7 @@ export { default as Menu01 } from './Menu01'
 export { default as Pin01 } from './Pin01'
 export { default as Pin02 } from './Pin02'
 export { default as Plus02 } from './Plus02'
+export { default as Refresh } from './Refresh'
 export { default as Settings01 } from './Settings01'
 export { default as Settings04 } from './Settings04'
 export { default as Target04 } from './Target04'
diff --git a/web/app/components/base/regenerate-btn/index.tsx b/web/app/components/base/regenerate-btn/index.tsx
new file mode 100644
index 0000000000..aaf0206df6
--- /dev/null
+++ b/web/app/components/base/regenerate-btn/index.tsx
@@ -0,0 +1,31 @@
+'use client'
+import { t } from 'i18next'
+import { Refresh } from '../icons/src/vender/line/general'
+import Tooltip from '@/app/components/base/tooltip'
+
+type Props = {
+  className?: string
+  onClick?: () => void
+}
+
+const RegenerateBtn = ({ className, onClick }: Props) => {
+  return (
+    <div className={`${className}`}>
+      <Tooltip
+        popupContent={t('appApi.regenerate') as string}
+      >
+        <div
+          className={'box-border p-0.5 flex items-center justify-center rounded-md bg-white cursor-pointer'}
+          onClick={() => onClick?.()}
+          style={{
+            boxShadow: '0px 4px 8px -2px rgba(16, 24, 40, 0.1), 0px 2px 4px -2px rgba(16, 24, 40, 0.06)',
+          }}
+        >
+          <Refresh className="p-[3.5px] w-6 h-6 text-[#667085] hover:bg-gray-50" />
+        </div>
+      </Tooltip>
+    </div>
+  )
+}
+
+export default RegenerateBtn
diff --git a/web/app/components/datasets/create/step-two/escape.ts b/web/app/components/datasets/create/step-two/escape.ts
new file mode 100644
index 0000000000..098f43bc7f
--- /dev/null
+++ b/web/app/components/datasets/create/step-two/escape.ts
@@ -0,0 +1,18 @@
+function escape(input: string): string {
+  if (!input || typeof input !== 'string')
+    return ''
+
+  const res = input
+    .replaceAll('\\', '\\\\')
+    .replaceAll('\0', '\\0')
+    .replaceAll('\b', '\\b')
+    .replaceAll('\f', '\\f')
+    .replaceAll('\n', '\\n')
+    .replaceAll('\r', '\\r')
+    .replaceAll('\t', '\\t')
+    .replaceAll('\v', '\\v')
+    .replaceAll('\'', '\\\'')
+  return res
+}
+
+export default escape
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx
index 15332b944d..f4fc58ee2a 100644
--- a/web/app/components/datasets/create/step-two/index.tsx
+++ b/web/app/components/datasets/create/step-two/index.tsx
@@ -1,5 +1,5 @@
 'use client'
-import React, { useEffect, useLayoutEffect, useRef, useState } from 'react'
+import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import { useBoolean } from 'ahooks'
@@ -13,6 +13,8 @@ import { groupBy } from 'lodash-es'
 import PreviewItem, { PreviewType } from './preview-item'
 import LanguageSelect from './language-select'
 import s from './index.module.css'
+import unescape from './unescape'
+import escape from './escape'
 import cn from '@/utils/classnames'
 import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
 import {
@@ -78,6 +80,8 @@ enum IndexingType {
   ECONOMICAL = 'economy',
 }
 
+const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
+
 const StepTwo = ({
   isSetting,
   documentDetail,
@@ -110,8 +114,11 @@ const StepTwo = ({
   const previewScrollRef = useRef<HTMLDivElement>(null)
   const [previewScrolled, setPreviewScrolled] = useState(false)
   const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO)
-  const [segmentIdentifier, setSegmentIdentifier] = useState('\\n')
-  const [max, setMax] = useState(5000) // default chunk length
+  const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
+  const setSegmentIdentifier = useCallback((value: string) => {
+    doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER)
+  }, [])
+  const [max, setMax] = useState(4000) // default chunk length
   const [overlap, setOverlap] = useState(50)
   const [rules, setRules] = useState<PreProcessingRule[]>([])
   const [defaultConfig, setDefaultConfig] = useState<Rules>()
@@ -183,7 +190,7 @@ const StepTwo = ({
   }
   const resetRules = () => {
     if (defaultConfig) {
-      setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n')
+      setSegmentIdentifier(defaultConfig.segmentation.separator)
       setMax(defaultConfig.segmentation.max_tokens)
       setOverlap(defaultConfig.segmentation.chunk_overlap)
       setRules(defaultConfig.pre_processing_rules)
@@ -217,7 +224,7 @@ const StepTwo = ({
       const ruleObj = {
         pre_processing_rules: rules,
         segmentation: {
-          separator: segmentIdentifier === '\\n' ? '\n' : segmentIdentifier,
+          separator: unescape(segmentIdentifier),
           max_tokens: max,
           chunk_overlap: overlap,
         },
@@ -394,7 +401,7 @@ const StepTwo = ({
     try {
       const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
       const separator = res.rules.segmentation.separator
-      setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n')
+      setSegmentIdentifier(separator)
       setMax(res.rules.segmentation.max_tokens)
       setOverlap(res.rules.segmentation.chunk_overlap)
       setRules(res.rules.pre_processing_rules)
@@ -411,7 +418,7 @@ const StepTwo = ({
       const separator = rules.segmentation.separator
       const max = rules.segmentation.max_tokens
       const overlap = rules.segmentation.chunk_overlap
-      setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n')
+      setSegmentIdentifier(separator)
       setMax(max)
       setOverlap(overlap)
       setRules(rules.pre_processing_rules)
@@ -616,12 +623,22 @@ const StepTwo = ({
                 <div className={s.typeFormBody}>
                   <div className={s.formRow}>
                     <div className='w-full'>
-                      <div className={s.label}>{t('datasetCreation.stepTwo.separator')}</div>
+                      <div className={s.label}>
+                        {t('datasetCreation.stepTwo.separator')}
+                        <Tooltip
+                          popupContent={
+                            <div className='max-w-[200px]'>
+                              {t('datasetCreation.stepTwo.separatorTip')}
+                            </div>
+                          }
+                        />
+                      </div>
                       <input
                         type="text"
                         className={s.input}
-                        placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={segmentIdentifier}
-                        onChange={e => setSegmentIdentifier(e.target.value)}
+                        placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''}
+                        value={segmentIdentifier}
+                        onChange={e => doSetSegmentIdentifier(e.target.value)}
                       />
                     </div>
                   </div>
@@ -803,7 +820,7 @@ const StepTwo = ({
                   <div className={s.label}>
                     <div className='shrink-0 mr-4'>{t('datasetSettings.form.retrievalSetting.title')}</div>
                     <div className='leading-[18px] text-xs font-normal text-gray-500'>
-                      <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+                      <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
                       {t('datasetSettings.form.retrievalSetting.longDescription')}
                     </div>
                   </div>
diff --git a/web/app/components/datasets/create/step-two/unescape.ts b/web/app/components/datasets/create/step-two/unescape.ts
new file mode 100644
index 0000000000..5c0f9e426a
--- /dev/null
+++ b/web/app/components/datasets/create/step-two/unescape.ts
@@ -0,0 +1,54 @@
+// https://github.com/iamakulov/unescape-js/blob/master/src/index.js
+
+/**
+ * \\ - matches the backslash which indicates the beginning of an escape sequence
+ * (
+ *   u\{([0-9A-Fa-f]+)\} - first alternative; matches the variable-length hexadecimal escape sequence (\u{ABCD0})
+ * |
+ *   u([0-9A-Fa-f]{4}) - second alternative; matches the 4-digit hexadecimal escape sequence (\uABCD)
+ * |
+ *   x([0-9A-Fa-f]{2}) - third alternative; matches the 2-digit hexadecimal escape sequence (\xA5)
+ * |
+ *   ([1-7][0-7]{0,2}|[0-7]{2,3}) - fourth alternative; matches the up-to-3-digit octal escape sequence (\5 or \512)
+ * |
+ *   (['"tbrnfv0\\]) - fifth alternative; matches the special escape characters (\t, \n and so on)
+ * |
+ *   \U([0-9A-Fa-f]+) - sixth alternative; matches the 8-digit hexadecimal escape sequence used by python (\U0001F3B5)
+ * )
+ */
+const jsEscapeRegex = /\\(u\{([0-9A-Fa-f]+)\}|u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([1-7][0-7]{0,2}|[0-7]{2,3})|(['"tbrnfv0\\]))|\\U([0-9A-Fa-f]{8})/g
+
+const usualEscapeSequences: Record<string, string> = {
+  '0': '\0',
+  'b': '\b',
+  'f': '\f',
+  'n': '\n',
+  'r': '\r',
+  't': '\t',
+  'v': '\v',
+  '\'': '\'',
+  '"': '"',
+  '\\': '\\',
+}
+
+const fromHex = (str: string) => String.fromCodePoint(parseInt(str, 16))
+const fromOct = (str: string) => String.fromCodePoint(parseInt(str, 8))
+
+const unescape = (str: string) => {
+  return str.replace(jsEscapeRegex, (_, __, varHex, longHex, shortHex, octal, specialCharacter, python) => {
+    if (varHex !== undefined)
+      return fromHex(varHex)
+    else if (longHex !== undefined)
+      return fromHex(longHex)
+    else if (shortHex !== undefined)
+      return fromHex(shortHex)
+    else if (octal !== undefined)
+      return fromOct(octal)
+    else if (python !== undefined)
+      return fromHex(python)
+    else
+      return usualEscapeSequences[specialCharacter]
+  })
+}
+
+export default unescape
diff --git a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
index 999f1cdf0d..1fc5b68d67 100644
--- a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
+++ b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
@@ -77,7 +77,7 @@ const ModifyRetrievalModal: FC<Props> = ({
         <div className='text-base font-semibold text-gray-900'>
           <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
           <div className='leading-[18px] text-xs font-normal text-gray-500'>
-            <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+            <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
             {t('datasetSettings.form.retrievalSetting.description')}
           </div>
         </div>
diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx
index 0f6bdd0a59..15b8abc242 100644
--- a/web/app/components/datasets/settings/form/index.tsx
+++ b/web/app/components/datasets/settings/form/index.tsx
@@ -245,7 +245,7 @@ const Form = () => {
           <div>
             <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
             <div className='leading-[18px] text-xs font-normal text-gray-500'>
-              <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+              <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
               {t('datasetSettings.form.retrievalSetting.description')}
             </div>
           </div>
diff --git a/web/app/components/workflow/panel/chat-record/index.tsx b/web/app/components/workflow/panel/chat-record/index.tsx
index afd20b7358..1bcfd6474d 100644
--- a/web/app/components/workflow/panel/chat-record/index.tsx
+++ b/web/app/components/workflow/panel/chat-record/index.tsx
@@ -2,7 +2,6 @@ import {
   memo,
   useCallback,
   useEffect,
-  useMemo,
   useState,
 } from 'react'
 import { RiCloseLine } from '@remixicon/react'
@@ -17,50 +16,70 @@ import type { ChatItem } from '@/app/components/base/chat/types'
 import { fetchConversationMessages } from '@/service/debug'
 import { useStore as useAppStore } from '@/app/components/app/store'
 import Loading from '@/app/components/base/loading'
+import { UUID_NIL } from '@/app/components/base/chat/constants'
+
+function appendQAToChatList(newChatList: ChatItem[], item: any) {
+  newChatList.push({
+    id: item.id,
+    content: item.answer,
+    feedback: item.feedback,
+    isAnswer: true,
+    citation: item.metadata?.retriever_resources,
+    message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
+    workflow_run_id: item.workflow_run_id,
+  })
+  newChatList.push({
+    id: `question-${item.id}`,
+    content: item.query,
+    isAnswer: false,
+    message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
+  })
+}
+
+function getFormattedChatList(messages: any[]) {
+  const newChatList: ChatItem[] = []
+  let nextMessageId = null
+  for (const item of messages) {
+    if (!item.parent_message_id) {
+      appendQAToChatList(newChatList, item)
+      break
+    }
+
+    if (!nextMessageId) {
+      appendQAToChatList(newChatList, item)
+      nextMessageId = item.parent_message_id
+    }
+    else {
+      if (item.id === nextMessageId || nextMessageId === UUID_NIL) {
+        appendQAToChatList(newChatList, item)
+        nextMessageId = item.parent_message_id
+      }
+    }
+  }
+  return newChatList.reverse()
+}
 
 const ChatRecord = () => {
   const [fetched, setFetched] = useState(false)
-  const [chatList, setChatList] = useState([])
+  const [chatList, setChatList] = useState<ChatItem[]>([])
   const appDetail = useAppStore(s => s.appDetail)
   const workflowStore = useWorkflowStore()
   const { handleLoadBackupDraft } = useWorkflowRun()
   const historyWorkflowData = useStore(s => s.historyWorkflowData)
   const currentConversationID = historyWorkflowData?.conversation_id
 
-  const chatMessageList = useMemo(() => {
-    const res: ChatItem[] = []
-    if (chatList.length) {
-      chatList.forEach((item: any) => {
-        res.push({
-          id: `question-${item.id}`,
-          content: item.query,
-          isAnswer: false,
-          message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [],
-        })
-        res.push({
-          id: item.id,
-          content: item.answer,
-          feedback: item.feedback,
-          isAnswer: true,
-          citation: item.metadata?.retriever_resources,
-          message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [],
-          workflow_run_id: item.workflow_run_id,
-        })
-      })
-    }
-    return res
-  }, [chatList])
-
   const handleFetchConversationMessages = useCallback(async () => {
     if (appDetail && currentConversationID) {
       try {
         setFetched(false)
         const res = await fetchConversationMessages(appDetail.id, currentConversationID)
-        setFetched(true)
-        setChatList((res as any).data)
+        setChatList(getFormattedChatList((res as any).data))
       }
       catch (e) {
-
+        console.error(e)
+      }
+      finally {
+        setFetched(true)
       }
     }
   }, [appDetail, currentConversationID])
@@ -101,7 +120,7 @@ const ChatRecord = () => {
               config={{
                 supportCitationHitInfo: true,
               } as any}
-              chatList={chatMessageList}
+              chatList={chatList}
               chatContainerClassName='px-4'
               chatContainerInnerClassName='pt-6 w-full max-w-full mx-auto'
               chatFooterClassName='px-4 rounded-b-2xl'
diff --git a/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx b/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx
index a7dd607e22..86519af603 100644
--- a/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx
+++ b/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx
@@ -18,7 +18,7 @@ import ConversationVariableModal from './conversation-variable-modal'
 import { useChat } from './hooks'
 import type { ChatWrapperRefType } from './index'
 import Chat from '@/app/components/base/chat/chat'
-import type { OnSend } from '@/app/components/base/chat/types'
+import type { ChatItem, OnSend } from '@/app/components/base/chat/types'
 import { useFeaturesStore } from '@/app/components/base/features/hooks'
 import {
   fetchSuggestedQuestions,
@@ -58,6 +58,8 @@ const ChatWrapper = forwardRef<ChatWrapperRefType, ChatWrapperProps>(({ showConv
   const {
     conversationId,
     chatList,
+    chatListRef,
+    handleUpdateChatList,
     handleStop,
     isResponding,
     suggestedQuestions,
@@ -73,19 +75,42 @@ const ChatWrapper = forwardRef<ChatWrapperRefType, ChatWrapperProps>(({ showConv
     taskId => stopChatMessageResponding(appDetail!.id, taskId),
   )
 
-  const doSend = useCallback<OnSend>((query, files) => {
+  const doSend = useCallback<OnSend>((query, files, last_answer) => {
+    const lastAnswer = chatListRef.current.at(-1)
+
     handleSend(
       {
         query,
         files,
         inputs: workflowStore.getState().inputs,
         conversation_id: conversationId,
+        parent_message_id: last_answer?.id || (lastAnswer
+          ? lastAnswer.isOpeningStatement
+            ? null
+            : lastAnswer.id
+          : null),
       },
       {
         onGetSuggestedQuestions: (messageId, getAbortController) => fetchSuggestedQuestions(appDetail!.id, messageId, getAbortController),
       },
     )
-  }, [conversationId, handleSend, workflowStore, appDetail])
+  }, [chatListRef, conversationId, handleSend, workflowStore, appDetail])
+
+  const doRegenerate = useCallback((chatItem: ChatItem) => {
+    const index = chatList.findIndex(item => item.id === chatItem.id)
+    if (index === -1)
+      return
+
+    const prevMessages = chatList.slice(0, index)
+    const question = prevMessages.pop()
+    const lastAnswer = prevMessages.at(-1)
+
+    if (!question)
+      return
+
+    handleUpdateChatList(prevMessages)
+    doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer)
+  }, [chatList, handleUpdateChatList, doSend])
 
   useImperativeHandle(ref, () => {
     return {
@@ -107,6 +132,7 @@ const ChatWrapper = forwardRef<ChatWrapperRefType, ChatWrapperProps>(({ showConv
         chatFooterClassName='px-4 rounded-bl-2xl'
         chatFooterInnerClassName='pb-4 w-full max-w-full mx-auto'
         onSend={doSend}
+        onRegenerate={doRegenerate}
         onStopResponding={handleStop}
         chatNode={(
           <>
diff --git a/web/app/components/workflow/panel/debug-and-preview/hooks.ts b/web/app/components/workflow/panel/debug-and-preview/hooks.ts
index 51a018bcb1..cad76a4490 100644
--- a/web/app/components/workflow/panel/debug-and-preview/hooks.ts
+++ b/web/app/components/workflow/panel/debug-and-preview/hooks.ts
@@ -387,6 +387,8 @@ export const useChat = (
   return {
     conversationId: conversationId.current,
     chatList,
+    chatListRef,
+    handleUpdateChatList,
     handleSend,
     handleStop,
     handleRestart,
diff --git a/web/app/signin/normalForm.tsx b/web/app/signin/normalForm.tsx
index 7f23c7d22e..816df8007d 100644
--- a/web/app/signin/normalForm.tsx
+++ b/web/app/signin/normalForm.tsx
@@ -217,6 +217,7 @@ const NormalForm = () => {
                       autoComplete="email"
                       placeholder={t('login.emailPlaceholder') || ''}
                       className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm'}
+                      tabIndex={1}
                     />
                   </div>
                 </div>
@@ -241,6 +242,7 @@ const NormalForm = () => {
                       autoComplete="current-password"
                       placeholder={t('login.passwordPlaceholder') || ''}
                       className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'}
+                      tabIndex={2}
                     />
                     <div className="absolute inset-y-0 right-0 flex items-center pr-3">
                       <button
diff --git a/web/hooks/use-app-favicon.ts b/web/hooks/use-app-favicon.ts
index 86eadc1b3d..1ff743928f 100644
--- a/web/hooks/use-app-favicon.ts
+++ b/web/hooks/use-app-favicon.ts
@@ -5,10 +5,10 @@ import type { AppIconType } from '@/types/app'
 
 type UseAppFaviconOptions = {
   enable?: boolean
-  icon_type?: AppIconType
+  icon_type?: AppIconType | null
   icon?: string
-  icon_background?: string
-  icon_url?: string
+  icon_background?: string | null
+  icon_url?: string | null
 }
 
 export function useAppFavicon(options: UseAppFaviconOptions) {
diff --git a/web/i18n/de-DE/dataset-creation.ts b/web/i18n/de-DE/dataset-creation.ts
index 8b27395049..5251cc94e8 100644
--- a/web/i18n/de-DE/dataset-creation.ts
+++ b/web/i18n/de-DE/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: 'Wissenseinstellungen.',
     websiteSource: 'Preprocess-Website',
     webpageUnit: 'Seiten',
+    separatorTip: 'Ein Trennzeichen ist das Zeichen, das zum Trennen von Text verwendet wird. \\n\\n und \\n sind häufig verwendete Trennzeichen zum Trennen von Absätzen und Zeilen. In Kombination mit Kommas (\\n\\n,\\n) werden Absätze nach Zeilen segmentiert, wenn die maximale Blocklänge überschritten wird. Sie können auch spezielle, von Ihnen selbst definierte Trennzeichen verwenden (z. B. ***).',
   },
   stepThree: {
     creationTitle: '🎉 Wissen erstellt',
diff --git a/web/i18n/en-US/app-api.ts b/web/i18n/en-US/app-api.ts
index 631faeee9a..355ff30602 100644
--- a/web/i18n/en-US/app-api.ts
+++ b/web/i18n/en-US/app-api.ts
@@ -6,6 +6,7 @@ const translation = {
   ok: 'In Service',
   copy: 'Copy',
   copied: 'Copied',
+  regenerate: 'Regenerate',
   play: 'Play',
   pause: 'Pause',
   playing: 'Playing',
diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts
index 40463593f9..32f9d596ca 100644
--- a/web/i18n/en-US/dataset-creation.ts
+++ b/web/i18n/en-US/dataset-creation.ts
@@ -87,7 +87,8 @@ const translation = {
     custom: 'Custom',
     customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.',
     separator: 'Delimiter',
-    separatorPlaceholder: 'For example, newline (\\\\n) or special separator (such as "***")',
+    separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).',
+    separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines',
     maxLength: 'Maximum chunk length',
     overlap: 'Chunk overlap',
     overlapTip: 'Setting the chunk overlap can maintain the semantic relevance between them, enhancing the retrieve effect. It is recommended to set 10%-25% of the maximum chunk size.',
diff --git a/web/i18n/es-ES/dataset-creation.ts b/web/i18n/es-ES/dataset-creation.ts
index 132c9cbb9b..e093632bc4 100644
--- a/web/i18n/es-ES/dataset-creation.ts
+++ b/web/i18n/es-ES/dataset-creation.ts
@@ -138,6 +138,7 @@ const translation = {
     indexSettingTip: 'Para cambiar el método de índice, por favor ve a la ',
     retrievalSettingTip: 'Para cambiar el método de índice, por favor ve a la ',
     datasetSettingLink: 'configuración del conocimiento.',
+    separatorTip: 'Un delimitador es el carácter que se utiliza para separar el texto. \\n\\n y \\n son delimitadores comúnmente utilizados para separar párrafos y líneas. Combinado con comas (\\n\\n,\\n), los párrafos se segmentarán por líneas cuando excedan la longitud máxima del fragmento. También puede utilizar delimitadores especiales definidos por usted mismo (por ejemplo, ***).',
   },
   stepThree: {
     creationTitle: '🎉 Conocimiento creado',
diff --git a/web/i18n/fa-IR/dataset-creation.ts b/web/i18n/fa-IR/dataset-creation.ts
index e6e6ad5bfb..40dae4b02d 100644
--- a/web/i18n/fa-IR/dataset-creation.ts
+++ b/web/i18n/fa-IR/dataset-creation.ts
@@ -138,6 +138,7 @@ const translation = {
     indexSettingTip: 'برای تغییر روش شاخص، لطفاً به',
     retrievalSettingTip: 'برای تغییر روش شاخص، لطفاً به',
     datasetSettingLink: 'تنظیمات دانش بروید.',
+    separatorTip: 'جداکننده نویسه ای است که برای جداسازی متن استفاده می شود. \\n\\n و \\n معمولا برای جداسازی پاراگراف ها و خطوط استفاده می شوند. همراه با کاما (\\n\\n,\\n)، پاراگراف ها زمانی که از حداکثر طول تکه فراتر می روند، با خطوط تقسیم بندی می شوند. همچنین می توانید از جداکننده های خاصی که توسط خودتان تعریف شده اند استفاده کنید (مثلا ***).',
   },
   stepThree: {
     creationTitle: ' دانش ایجاد شد',
diff --git a/web/i18n/fr-FR/dataset-creation.ts b/web/i18n/fr-FR/dataset-creation.ts
index c08a3e5731..38066885d0 100644
--- a/web/i18n/fr-FR/dataset-creation.ts
+++ b/web/i18n/fr-FR/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: 'Paramètres de connaissance.',
     webpageUnit: 'Pages',
     websiteSource: 'Site web de prétraitement',
+    separatorTip: 'Un délimiteur est le caractère utilisé pour séparer le texte. \\n\\n et \\n sont des délimiteurs couramment utilisés pour séparer les paragraphes et les lignes. Combiné à des virgules (\\n\\n,\\n), les paragraphes seront segmentés par des lignes lorsqu’ils dépasseront la longueur maximale des morceaux. Vous pouvez également utiliser des délimiteurs spéciaux définis par vous-même (par exemple ***).',
   },
   stepThree: {
     creationTitle: '🎉 Connaissance créée',
diff --git a/web/i18n/hi-IN/dataset-creation.ts b/web/i18n/hi-IN/dataset-creation.ts
index 0fa71acf4a..19c396081a 100644
--- a/web/i18n/hi-IN/dataset-creation.ts
+++ b/web/i18n/hi-IN/dataset-creation.ts
@@ -155,6 +155,7 @@ const translation = {
     indexSettingTip: 'इंडेक्स विधि बदलने के लिए, कृपया जाएं ',
     retrievalSettingTip: 'इंडेक्स विधि बदलने के लिए, कृपया जाएं ',
     datasetSettingLink: 'ज्ञान सेटिंग्स।',
+    separatorTip: 'एक सीमांकक पाठ को अलग करने के लिए उपयोग किया जाने वाला वर्ण है। \\n\\n और \\n आमतौर पर पैराग्राफ और लाइनों को अलग करने के लिए उपयोग किए जाने वाले सीमांकक हैं। अल्पविराम (\\n\\n,\\n) के साथ संयुक्त, अधिकतम खंड लंबाई से अधिक होने पर अनुच्छेदों को पंक्तियों द्वारा खंडित किया जाएगा। आप स्वयं द्वारा परिभाषित विशेष सीमांकक का भी उपयोग कर सकते हैं (उदा. ***).',
   },
   stepThree: {
     creationTitle: '🎉 ज्ञान बनाया गया',
diff --git a/web/i18n/it-IT/dataset-creation.ts b/web/i18n/it-IT/dataset-creation.ts
index 1629776bf3..46889b80a6 100644
--- a/web/i18n/it-IT/dataset-creation.ts
+++ b/web/i18n/it-IT/dataset-creation.ts
@@ -158,6 +158,7 @@ const translation = {
     indexSettingTip: 'Per cambiare il metodo di indicizzazione, vai alle ',
     retrievalSettingTip: 'Per cambiare il metodo di indicizzazione, vai alle ',
     datasetSettingLink: 'impostazioni della Conoscenza.',
+    separatorTip: 'Un delimitatore è il carattere utilizzato per separare il testo. \\n\\n e \\n sono delimitatori comunemente usati per separare paragrafi e righe. In combinazione con le virgole (\\n\\n,\\n), i paragrafi verranno segmentati per righe quando superano la lunghezza massima del blocco. È inoltre possibile utilizzare delimitatori speciali definiti dall\'utente (ad es. ***).',
   },
   stepThree: {
     creationTitle: '🎉 Conoscenza creata',
diff --git a/web/i18n/ja-JP/dataset-creation.ts b/web/i18n/ja-JP/dataset-creation.ts
index e6d204840a..d11a0c94e9 100644
--- a/web/i18n/ja-JP/dataset-creation.ts
+++ b/web/i18n/ja-JP/dataset-creation.ts
@@ -138,6 +138,7 @@ const translation = {
     indexSettingTip: 'インデックス方法を変更するには、',
     retrievalSettingTip: '検索方法を変更するには、',
     datasetSettingLink: 'ナレッジ設定',
+    separatorTip: '区切り文字は、テキストを区切るために使用される文字です。\\n\\n と \\n は、段落と行を区切るために一般的に使用される区切り記号です。カンマ (\\n\\n,\\n) と組み合わせると、最大チャンク長を超えると、段落は行で区切られます。自分で定義した特別な区切り文字を使用することもできます(例:***)。',
   },
   stepThree: {
     creationTitle: '🎉 ナレッジが作成されました',
diff --git a/web/i18n/ko-KR/dataset-creation.ts b/web/i18n/ko-KR/dataset-creation.ts
index e8851acd2f..6a4126d8db 100644
--- a/web/i18n/ko-KR/dataset-creation.ts
+++ b/web/i18n/ko-KR/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: '지식 설정',
     webpageUnit: '페이지',
     websiteSource: '웹 사이트 전처리',
+    separatorTip: '구분 기호는 텍스트를 구분하는 데 사용되는 문자입니다. \\n\\n 및 \\n은 단락과 줄을 구분하는 데 일반적으로 사용되는 구분 기호입니다. 쉼표(\\n\\n,\\n)와 함께 사용하면 최대 청크 길이를 초과할 경우 단락이 줄로 분할됩니다. 직접 정의한 특수 구분 기호(예: ***)를 사용할 수도 있습니다.',
   },
   stepThree: {
     creationTitle: '🎉 지식이 생성되었습니다',
diff --git a/web/i18n/pl-PL/dataset-creation.ts b/web/i18n/pl-PL/dataset-creation.ts
index 64e50c6b33..f5b36e62ee 100644
--- a/web/i18n/pl-PL/dataset-creation.ts
+++ b/web/i18n/pl-PL/dataset-creation.ts
@@ -146,6 +146,7 @@ const translation = {
     datasetSettingLink: 'ustawień Wiedzy.',
     webpageUnit: 'Stron',
     websiteSource: 'Witryna internetowa przetwarzania wstępnego',
+    separatorTip: 'Ogranicznik to znak używany do oddzielania tekstu. \\n\\n i \\n są powszechnie używanymi ogranicznikami do oddzielania akapitów i wierszy. W połączeniu z przecinkami (\\n\\n,\\n), akapity będą segmentowane wierszami po przekroczeniu maksymalnej długości fragmentu. Możesz również skorzystać ze zdefiniowanych przez siebie specjalnych ograniczników (np. ***).',
   },
   stepThree: {
     creationTitle: '🎉 Utworzono Wiedzę',
diff --git a/web/i18n/pt-BR/dataset-creation.ts b/web/i18n/pt-BR/dataset-creation.ts
index 4ab78a50c7..511f0d5bcb 100644
--- a/web/i18n/pt-BR/dataset-creation.ts
+++ b/web/i18n/pt-BR/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: 'configurações do Conhecimento.',
     websiteSource: 'Site de pré-processamento',
     webpageUnit: 'Páginas',
+    separatorTip: 'Um delimitador é o caractere usado para separar o texto. \\n\\n e \\n são delimitadores comumente usados para separar parágrafos e linhas. Combinado com vírgulas (\\n\\n,\\n), os parágrafos serão segmentados por linhas ao exceder o comprimento máximo do bloco. Você também pode usar delimitadores especiais definidos por você (por exemplo, ***).',
   },
   stepThree: {
     creationTitle: '🎉 Conhecimento criado',
diff --git a/web/i18n/ro-RO/dataset-creation.ts b/web/i18n/ro-RO/dataset-creation.ts
index efe3bb246c..4ea0b04758 100644
--- a/web/i18n/ro-RO/dataset-creation.ts
+++ b/web/i18n/ro-RO/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: 'setările Cunoștinței.',
     webpageUnit: 'Pagini',
     websiteSource: 'Site-ul web de preprocesare',
+    separatorTip: 'Un delimitator este caracterul folosit pentru a separa textul. \\n\\n și \\n sunt delimitatori utilizați în mod obișnuit pentru separarea paragrafelor și liniilor. Combinate cu virgule (\\n\\n,\\n), paragrafele vor fi segmentate pe linii atunci când depășesc lungimea maximă a bucății. De asemenea, puteți utiliza delimitatori speciali definiți de dumneavoastră (de exemplu, ***).',
   },
   stepThree: {
     creationTitle: '🎉 Cunoștință creată',
diff --git a/web/i18n/ru-RU/dataset-creation.ts b/web/i18n/ru-RU/dataset-creation.ts
index c4dce774d8..c97daeeece 100644
--- a/web/i18n/ru-RU/dataset-creation.ts
+++ b/web/i18n/ru-RU/dataset-creation.ts
@@ -138,6 +138,7 @@ const translation = {
     indexSettingTip: 'Чтобы изменить метод индексации, пожалуйста, перейдите в ',
     retrievalSettingTip: 'Чтобы изменить метод индексации, пожалуйста, перейдите в ',
     datasetSettingLink: 'настройки базы знаний.',
+    separatorTip: 'Разделитель — это символ, используемый для разделения текста. \\n\\n и \\n — это часто используемые разделители для разделения абзацев и строк. В сочетании с запятыми (\\n\\n,\\n) абзацы будут сегментированы по строкам, если максимальная длина блока превышает их. Вы также можете использовать специальные разделители, определенные вами (например, ***).',
   },
   stepThree: {
     creationTitle: '🎉 База знаний создана',
diff --git a/web/i18n/tr-TR/dataset-creation.ts b/web/i18n/tr-TR/dataset-creation.ts
index b26608c39f..c29e3045b8 100644
--- a/web/i18n/tr-TR/dataset-creation.ts
+++ b/web/i18n/tr-TR/dataset-creation.ts
@@ -138,6 +138,7 @@ const translation = {
     indexSettingTip: 'Dizin yöntemini değiştirmek için, lütfen',
     retrievalSettingTip: 'Dizin yöntemini değiştirmek için, lütfen',
     datasetSettingLink: 'Bilgi ayarlarına gidin.',
+    separatorTip: 'Sınırlayıcı, metni ayırmak için kullanılan karakterdir. \\n\\n ve \\n, paragrafları ve satırları ayırmak için yaygın olarak kullanılan sınırlayıcılardır. Virgüllerle (\\n\\n,\\n) birleştirildiğinde, paragraflar maksimum öbek uzunluğunu aştığında satırlarla bölünür. Kendiniz tarafından tanımlanan özel sınırlayıcıları da kullanabilirsiniz (örn.',
   },
   stepThree: {
     creationTitle: '🎉 Bilgi oluşturuldu',
diff --git a/web/i18n/uk-UA/dataset-creation.ts b/web/i18n/uk-UA/dataset-creation.ts
index e4a38f41f4..5b2c9503cf 100644
--- a/web/i18n/uk-UA/dataset-creation.ts
+++ b/web/i18n/uk-UA/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: 'Налаштування знань.',
     webpageUnit: 'Сторінок',
     websiteSource: 'Веб-сайт попередньої обробки',
+    separatorTip: 'Роздільник – це символ, який використовується для поділу тексту. \\n\\n та \\n є часто використовуваними роздільниками для відокремлення абзаців та рядків. У поєднанні з комами (\\n\\n,\\n) абзаци будуть розділені лініями, якщо вони перевищують максимальну довжину фрагмента. Ви також можете використовувати спеціальні роздільники, визначені вами (наприклад, ***).',
   },
   stepThree: {
     creationTitle: '🎉 Знання створено',
diff --git a/web/i18n/vi-VN/dataset-creation.ts b/web/i18n/vi-VN/dataset-creation.ts
index da69020287..af49575b90 100644
--- a/web/i18n/vi-VN/dataset-creation.ts
+++ b/web/i18n/vi-VN/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: 'cài đặt Kiến thức.',
     websiteSource: 'Trang web tiền xử lý',
     webpageUnit: 'Trang',
+    separatorTip: 'Dấu phân cách là ký tự được sử dụng để phân tách văn bản. \\n\\n và \\n là dấu phân cách thường được sử dụng để tách các đoạn văn và dòng. Kết hợp với dấu phẩy (\\n\\n,\\n), các đoạn văn sẽ được phân đoạn theo các dòng khi vượt quá độ dài đoạn tối đa. Bạn cũng có thể sử dụng dấu phân cách đặc biệt do chính bạn xác định (ví dụ: ***).',
   },
   stepThree: {
     creationTitle: '🎉 Kiến thức đã được tạo',
diff --git a/web/i18n/zh-Hans/app-api.ts b/web/i18n/zh-Hans/app-api.ts
index 6b9048b66e..a0defdab62 100644
--- a/web/i18n/zh-Hans/app-api.ts
+++ b/web/i18n/zh-Hans/app-api.ts
@@ -6,6 +6,7 @@ const translation = {
   ok: '运行中',
   copy: '复制',
   copied: '已复制',
+  regenerate: '重新生成',
   play: '播放',
   pause: '暂停',
   playing: '播放中',
diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts
index 47a15921f7..78f5170791 100644
--- a/web/i18n/zh-Hans/dataset-creation.ts
+++ b/web/i18n/zh-Hans/dataset-creation.ts
@@ -87,7 +87,8 @@ const translation = {
     custom: '自定义',
     customDescription: '自定义分段规则、分段长度以及预处理规则等参数',
     separator: '分段标识符',
-    separatorPlaceholder: '例如换行符（\n）或特定的分隔符（如 "***"）',
+    separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符（\\n\\n,\\n），当段落超过最大块长度时，会按行进行分割。你也可以使用自定义的特殊分隔符（例如 ***）。',
+    separatorPlaceholder: '\\n\\n 用于分段；\\n 用于分行',
     maxLength: '分段最大长度',
     overlap: '分段重叠长度',
     overlapTip: '设置分段之间的重叠长度可以保留分段之间的语义关系，提升召回效果。建议设置为最大分段长度的10%-25%',
diff --git a/web/i18n/zh-Hant/dataset-creation.ts b/web/i18n/zh-Hant/dataset-creation.ts
index fd810d41c1..73a57db6a0 100644
--- a/web/i18n/zh-Hant/dataset-creation.ts
+++ b/web/i18n/zh-Hant/dataset-creation.ts
@@ -133,6 +133,7 @@ const translation = {
     datasetSettingLink: '知識庫設定。',
     websiteSource: '預處理網站',
     webpageUnit: '頁面',
+    separatorTip: '分隔符是用於分隔文字的字元。\\n\\n 和 \\n 是分隔段落和行的常用分隔符。與逗號 （\\n\\n，\\n） 組合使用時，當超過最大區塊長度時，段落將按行分段。您也可以使用自定義的特殊分隔符（例如 ***）。',
   },
   stepThree: {
     creationTitle: '🎉 知識庫已建立',
diff --git a/web/models/log.ts b/web/models/log.ts
index 8da1c4cf4e..dc557bfe21 100644
--- a/web/models/log.ts
+++ b/web/models/log.ts
@@ -106,6 +106,7 @@ export type MessageContent = {
   metadata: Metadata
   agent_thoughts: any[] // TODO
   workflow_run_id: string
+  parent_message_id: string | null
 }
 
 export type CompletionConversationGeneralDetail = {
diff --git a/web/package.json b/web/package.json
index bc532fb242..b775d87184 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "dify-web",
-  "version": "0.8.2",
+  "version": "0.8.3",
   "private": true,
   "engines": {
     "node": ">=18.17.0"
@@ -37,6 +37,7 @@
     "@remixicon/react": "^4.2.0",
     "@sentry/react": "^7.54.0",
     "@sentry/utils": "^7.54.0",
+    "@svgdotjs/svg.js": "^3.2.4",
     "@tailwindcss/line-clamp": "^0.4.4",
     "@tailwindcss/typography": "^0.5.9",
     "ahooks": "^3.7.5",
@@ -44,7 +45,6 @@
     "classnames": "^2.3.2",
     "copy-to-clipboard": "^3.3.3",
     "crypto-js": "^4.2.0",
-    "@svgdotjs/svg.js": "^3.2.4",
     "dayjs": "^1.11.7",
     "echarts": "^5.4.1",
     "echarts-for-react": "^3.0.2",
diff --git a/web/utils/format.spec.ts b/web/utils/format.spec.ts
new file mode 100644
index 0000000000..f349efa4e4
--- /dev/null
+++ b/web/utils/format.spec.ts
@@ -0,0 +1,61 @@
+import { formatFileSize, formatNumber, formatTime } from './format'
+describe('formatNumber', () => {
+  test('should correctly format integers', () => {
+    expect(formatNumber(1234567)).toBe('1,234,567')
+  })
+  test('should correctly format decimals', () => {
+    expect(formatNumber(1234567.89)).toBe('1,234,567.89')
+  })
+  test('should correctly handle string input', () => {
+    expect(formatNumber('1234567')).toBe('1,234,567')
+  })
+  test('should correctly handle zero', () => {
+    expect(formatNumber(0)).toBe(0)
+  })
+  test('should correctly handle negative numbers', () => {
+    expect(formatNumber(-1234567)).toBe('-1,234,567')
+  })
+  test('should correctly handle empty input', () => {
+    expect(formatNumber('')).toBe('')
+  })
+})
+describe('formatFileSize', () => {
+  test('should return the input if it is falsy', () => {
+    expect(formatFileSize(0)).toBe(0)
+  })
+  test('should format bytes correctly', () => {
+    expect(formatFileSize(500)).toBe('500.00B')
+  })
+  test('should format kilobytes correctly', () => {
+    expect(formatFileSize(1500)).toBe('1.46KB')
+  })
+  test('should format megabytes correctly', () => {
+    expect(formatFileSize(1500000)).toBe('1.43MB')
+  })
+  test('should format gigabytes correctly', () => {
+    expect(formatFileSize(1500000000)).toBe('1.40GB')
+  })
+  test('should format terabytes correctly', () => {
+    expect(formatFileSize(1500000000000)).toBe('1.36TB')
+  })
+  test('should format petabytes correctly', () => {
+    expect(formatFileSize(1500000000000000)).toBe('1.33PB')
+  })
+})
+describe('formatTime', () => {
+  test('should return the input if it is falsy', () => {
+    expect(formatTime(0)).toBe(0)
+  })
+  test('should format seconds correctly', () => {
+    expect(formatTime(30)).toBe('30.00 sec')
+  })
+  test('should format minutes correctly', () => {
+    expect(formatTime(90)).toBe('1.50 min')
+  })
+  test('should format hours correctly', () => {
+    expect(formatTime(3600)).toBe('1.00 h')
+  })
+  test('should handle large numbers', () => {
+    expect(formatTime(7200)).toBe('2.00 h')
+  })
+})