From 91e1ff5e30e3dca7519ffe59c26d8c060c5dbf3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= <hjlarry@163.com>
Date: Wed, 4 Dec 2024 15:14:30 +0800
Subject: [PATCH] chore: improve zhipu LLM (#11321)

---
 .../zhipuai/llm/glm-4-0520.yaml               |   1 +
 .../zhipuai/llm/glm-4-air.yaml                |   1 +
 .../zhipuai/llm/glm-4-airx.yaml               |   1 +
 .../zhipuai/llm/glm-4-flash.yaml              |   1 +
 .../zhipuai/llm/glm-4-flashx.yaml             |   1 +
 .../zhipuai/llm/glm_3_turbo.yaml              |   1 +
 .../model_providers/zhipuai/llm/glm_4.yaml    |   1 +
 .../zhipuai/llm/glm_4_long.yaml               |   2 +-
 .../zhipuai/llm/glm_4_plus.yaml               |   1 +
 .../model_providers/zhipuai/llm/glm_4v.yaml   |   1 +
 .../zhipuai/llm/glm_4v_plus.yaml              |   1 +
 .../model_providers/zhipuai/llm/llm.py        | 127 +++++-------------
 12 files changed, 41 insertions(+), 98 deletions(-)

diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
index 7c8da51d1b..035d9881eb 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
index 7a7b4b0892..c3ee76141d 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
index 09ad842801..1926db7ac3 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 8192
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
index aee82a0602..e54b5de4a1 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml
index 40ff7609c7..724fe48909 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
index 791a77ba15..fa5b1e1fe9 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
index 13ed1e49c9..e1eb13df3d 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
index badcee22db..c0c4e04d37 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 10240
+  context_size: 1048576
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
index e2f785e1bc..c4f26f8ba9 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
@@ -8,6 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
+  context_size: 131072
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
index 3baa298300..0d99f89cb8 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -4,6 +4,7 @@ label:
 model_type: llm
 model_properties:
   mode: chat
+  context_size: 2048
 features:
   - vision
 parameter_rules:
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
index dbda18b888..5cd0e16b0e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
@@ -4,6 +4,7 @@ label:
 model_type: llm
 model_properties:
   mode: chat
+  context_size: 8192
 features:
   - vision
   - video
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/llm.py b/api/core/model_runtime/model_providers/zhipuai/llm/llm.py
index eddb94aba3..e0601d681c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/llm.py
@@ -22,18 +22,6 @@ from core.model_runtime.model_providers.__base.large_language_model import Large
 from core.model_runtime.model_providers.zhipuai._common import _CommonZhipuaiAI
 from core.model_runtime.utils import helper
 
-GLM_JSON_MODE_PROMPT = """You should always follow the instructions and output a valid JSON object.
-The structure of the JSON object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
-if you are not sure about the structure.
-
-And you should always end the block with a "```" to indicate the end of the JSON object.
-
-<instructions>
-{{instructions}}
-</instructions>
-
-```JSON"""  # noqa: E501
-
 
 class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
     def _invoke(
@@ -64,42 +52,8 @@ class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
         credentials_kwargs = self._to_credential_kwargs(credentials)
 
         # invoke model
-        # stop = stop or []
-        # self._transform_json_prompts(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
         return self._generate(model, credentials_kwargs, prompt_messages, model_parameters, tools, stop, stream, user)
 
-    # def _transform_json_prompts(self, model: str, credentials: dict,
-    #                             prompt_messages: list[PromptMessage], model_parameters: dict,
-    #                             tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
-    #                             stream: bool = True, user: str | None = None) \
-    #                         -> None:
-    #     """
-    #     Transform json prompts to model prompts
-    #     """
-    #     if "}\n\n" not in stop:
-    #         stop.append("}\n\n")
-
-    #     # check if there is a system message
-    #     if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
-    #         # override the system message
-    #         prompt_messages[0] = SystemPromptMessage(
-    #             content=GLM_JSON_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content)
-    #         )
-    #     else:
-    #         # insert the system message
-    #         prompt_messages.insert(0, SystemPromptMessage(
-    #             content=GLM_JSON_MODE_PROMPT.replace("{{instructions}}", "Please output a valid JSON object.")
-    #         ))
-    #     # check if the last message is a user message
-    #     if len(prompt_messages) > 0 and isinstance(prompt_messages[-1], UserPromptMessage):
-    #         # add ```JSON\n to the last message
-    #         prompt_messages[-1].content += "\n```JSON\n"
-    #     else:
-    #         # append a user message
-    #         prompt_messages.append(UserPromptMessage(
-    #             content="```JSON\n"
-    #         ))
-
     def get_num_tokens(
         self,
         model: str,
@@ -170,7 +124,7 @@ class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
         :return: full response or stream response chunk generator result
         """
         extra_model_kwargs = {}
-        # request to glm-4v-plus with stop words will always response "finish_reason":"network_error"
+        # request to glm-4v-plus with stop words will always respond "finish_reason":"network_error"
         if stop and model != "glm-4v-plus":
             extra_model_kwargs["stop"] = stop
 
@@ -186,7 +140,7 @@ class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
         # resolve zhipuai model not support system message and user message, assistant message must be in sequence
         new_prompt_messages: list[PromptMessage] = []
         for prompt_message in prompt_messages:
-            copy_prompt_message = prompt_message.copy()
+            copy_prompt_message = prompt_message.model_copy()
             if copy_prompt_message.role in {PromptMessageRole.USER, PromptMessageRole.SYSTEM, PromptMessageRole.TOOL}:
                 if isinstance(copy_prompt_message.content, list):
                     # check if model is 'glm-4v'
@@ -238,59 +192,38 @@ class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
             params = self._construct_glm_4v_parameter(model, new_prompt_messages, model_parameters)
         else:
             params = {"model": model, "messages": [], **model_parameters}
-            # glm model
-            if not model.startswith("chatglm"):
-                for prompt_message in new_prompt_messages:
-                    if prompt_message.role == PromptMessageRole.TOOL:
+            for prompt_message in new_prompt_messages:
+                if prompt_message.role == PromptMessageRole.TOOL:
+                    params["messages"].append(
+                        {
+                            "role": "tool",
+                            "content": prompt_message.content,
+                            "tool_call_id": prompt_message.tool_call_id,
+                        }
+                    )
+                elif isinstance(prompt_message, AssistantPromptMessage):
+                    if prompt_message.tool_calls:
                         params["messages"].append(
                             {
-                                "role": "tool",
+                                "role": "assistant",
                                 "content": prompt_message.content,
-                                "tool_call_id": prompt_message.tool_call_id,
+                                "tool_calls": [
+                                    {
+                                        "id": tool_call.id,
+                                        "type": tool_call.type,
+                                        "function": {
+                                            "name": tool_call.function.name,
+                                            "arguments": tool_call.function.arguments,
+                                        },
+                                    }
+                                    for tool_call in prompt_message.tool_calls
+                                ],
                             }
                         )
-                    elif isinstance(prompt_message, AssistantPromptMessage):
-                        if prompt_message.tool_calls:
-                            params["messages"].append(
-                                {
-                                    "role": "assistant",
-                                    "content": prompt_message.content,
-                                    "tool_calls": [
-                                        {
-                                            "id": tool_call.id,
-                                            "type": tool_call.type,
-                                            "function": {
-                                                "name": tool_call.function.name,
-                                                "arguments": tool_call.function.arguments,
-                                            },
-                                        }
-                                        for tool_call in prompt_message.tool_calls
-                                    ],
-                                }
-                            )
-                        else:
-                            params["messages"].append({"role": "assistant", "content": prompt_message.content})
                     else:
-                        params["messages"].append(
-                            {"role": prompt_message.role.value, "content": prompt_message.content}
-                        )
-            else:
-                # chatglm model
-                for prompt_message in new_prompt_messages:
-                    # merge system message to user message
-                    if prompt_message.role in {
-                        PromptMessageRole.SYSTEM,
-                        PromptMessageRole.TOOL,
-                        PromptMessageRole.USER,
-                    }:
-                        if len(params["messages"]) > 0 and params["messages"][-1]["role"] == "user":
-                            params["messages"][-1]["content"] += "\n\n" + prompt_message.content
-                        else:
-                            params["messages"].append({"role": "user", "content": prompt_message.content})
-                    else:
-                        params["messages"].append(
-                            {"role": prompt_message.role.value, "content": prompt_message.content}
-                        )
+                        params["messages"].append({"role": "assistant", "content": prompt_message.content})
+                else:
+                    params["messages"].append({"role": prompt_message.role.value, "content": prompt_message.content})
 
         if tools and len(tools) > 0:
             params["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools]
@@ -406,7 +339,7 @@ class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
         Handle llm stream response
 
         :param model: model name
-        :param response: response
+        :param responses: response
         :param prompt_messages: prompt messages
         :return: llm response chunk generator result
         """
@@ -505,7 +438,7 @@ class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel):
         if tools and len(tools) > 0:
             text += "\n\nTools:"
             for tool in tools:
-                text += f"\n{tool.json()}"
+                text += f"\n{tool.model_dump_json()}"
 
         # trim off the trailing ' ' that might come from the "Assistant: "
         return text.rstrip()