From e8ad0339a3890fd7bc4a20a9517aab681f6f7b35 Mon Sep 17 00:00:00 2001
From: LXM <lxm.xupt@gmail.com>
Date: Sat, 22 Jun 2024 12:25:23 +0800
Subject: [PATCH] fix: tongyi json output (#5396)

---
 .../model_providers/tongyi/llm/llm.py         | 30 +++----
 .../tongyi/test_response_format.py            | 84 +++++++++++++++++++
 2 files changed, 96 insertions(+), 18 deletions(-)
 create mode 100644 api/tests/integration_tests/model_runtime/tongyi/test_response_format.py
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index 3d0a80144c..41d8f37aaf 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -18,7 +18,7 @@ from dashscope.common.error import (
 )
 
 from core.model_runtime.callbacks.base_callback import Callback
-from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
     AssistantPromptMessage,
     ImagePromptMessageContent,
@@ -82,6 +82,7 @@ if you are not sure about the structure.
 <instructions>
 {{instructions}}
 </instructions>
+You should also complete the text started with ``` but not tell ``` directly.
 """
 
         code_block = model_parameters.get("response_format", "")
@@ -113,21 +114,17 @@ if you are not sure about the structure.
             # insert the system message
             prompt_messages.insert(0, SystemPromptMessage(
                 content=block_prompts
-                .replace("{{instructions}}", f"Please output a valid {code_block} object.")
+                .replace("{{instructions}}", f"Please output a valid {code_block} with markdown codeblocks.")
             ))
 
-        mode = self.get_model_mode(model, credentials)
-        if mode == LLMMode.CHAT:
-            if len(prompt_messages) > 0 and isinstance(prompt_messages[-1], UserPromptMessage):
-                # add ```JSON\n to the last message
-                prompt_messages[-1].content += f"\n```{code_block}\n"
-            else:
-                # append a user message
-                prompt_messages.append(UserPromptMessage(
-                    content=f"```{code_block}\n"
-                ))
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[-1], UserPromptMessage):
+            # add ```JSON\n to the last message
+            prompt_messages[-1].content += f"\n```{code_block}\n"
         else:
-            prompt_messages.append(AssistantPromptMessage(content=f"```{code_block}\n"))
+            # append a user message
+            prompt_messages.append(UserPromptMessage(
+                content=f"```{code_block}\n"
+            ))
 
         response = self._invoke(
             model=model,
@@ -243,11 +240,8 @@ if you are not sure about the structure.
 
             response = MultiModalConversation.call(**params, stream=stream)
         else:
-            if mode == LLMMode.CHAT:
-                params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
-            else:
-                params['prompt'] = prompt_messages[0].content.rstrip()
-
+            # nothing different between chat model and completion model in tongyi
+            params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
             response = Generation.call(**params,
                                        result_format='message',
                                        stream=stream)
diff --git a/api/tests/integration_tests/model_runtime/tongyi/test_response_format.py b/api/tests/integration_tests/model_runtime/tongyi/test_response_format.py
new file mode 100644
index 0000000000..1b0a38d5d1
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/tongyi/test_response_format.py
@@ -0,0 +1,84 @@
+import json
+import os
+from collections.abc import Generator
+
+from core.model_runtime.entities.llm_entities import LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import AssistantPromptMessage, UserPromptMessage
+from core.model_runtime.model_providers.tongyi.llm.llm import TongyiLargeLanguageModel
+
+
+def test_invoke_model_with_json_response():
+    """
+    Test the invocation of a model with JSON response.
+    """
+    model_list = [
+        "qwen-max-0403",
+        "qwen-max-1201",
+        "qwen-max-longcontext",
+        "qwen-max",
+        "qwen-plus-chat",
+        "qwen-plus",
+        "qwen-turbo-chat",
+        "qwen-turbo",
+    ]
+    for model_name in model_list:
+        print("testing model: ", model_name)
+        invoke_model_with_json_response(model_name)
+
+
+def invoke_model_with_json_response(model_name="qwen-max-0403"):
+    """
+    Method to invoke the model with JSON response format.
+    Args:
+        model_name (str): The name of the model to invoke. Defaults to "qwen-max-0403".
+
+    Returns:
+        None
+    """
+    model = TongyiLargeLanguageModel()
+
+    response = model.invoke(
+        model=model_name,
+        credentials={
+            'dashscope_api_key': os.environ.get('TONGYI_DASHSCOPE_API_KEY')
+        },
+        prompt_messages=[
+            UserPromptMessage(
+                content='output json data with format `{"data": "test", "code": 200, "msg": "success"}'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.5,
+            'max_tokens': 50,
+            'response_format': 'JSON',
+        },
+        stream=True,
+        user="abc-123"
+    )
+    print("=====================================")
+    print(response)
+    assert isinstance(response, Generator)
+    output = ""
+    for chunk in response:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        output += chunk.delta.message.content
+    assert is_json(output)
+
+
+def is_json(s):
+    """
+    Check if a string is a valid JSON.
+
+    Args:
+        s (str): The string to check.
+
+    Returns:
+        bool: True if the string is a valid JSON, False otherwise.
+    """
+    try:
+        json.loads(s)
+    except ValueError:
+        return False
+    return True
\ No newline at end of file