From ed11be23bf8c41f78276b8d76cf151de0a11b988 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BB=BB=E5=A5=87?=
 <49386598+alen08266@users.noreply.github.com>
Date: Tue, 11 Mar 2025 19:56:07 +0800
Subject: [PATCH] =?UTF-8?q?Fix:=20When=20calling=20the=20Create=20chat=20c?=
 =?UTF-8?q?ompletion=20API,=20the=20response=20data=E2=80=A6=20(#5928)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: renqi <renqi08266@fxomail.com>
---
 api/apps/sdk/session.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
index 08dc23593..aeca09aaf 100644
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -259,6 +259,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
         # The choices field on the last chunk will always be an empty array [].
         def streamed_response_generator(chat_id, dia, msg):
             token_used = 0
+            should_split_index = 0
             response = {
                 "id": f"chatcmpl-{chat_id}",
                 "choices": [
@@ -284,8 +285,19 @@ def chat_completion_openai_like(tenant_id, chat_id):
             try:
                 for ans in chat(dia, msg, True):
                     answer = ans["answer"]
-                    incremental = answer[token_used:]
+                    incremental = answer[should_split_index:]
                     token_used += len(incremental)
+
+                    """
+                    bugfix: When calling the Create chat completion API, the response data is incoherent.
+                    bug code: token_used += len(incremental)
+                    fix author: 任奇
+                    """
+                    if incremental.endswith("</think>"):
+                        response_data_len = len(incremental.rstrip("</think>"))
+                    else:
+                        response_data_len = len(incremental)
+                    should_split_index += response_data_len
                     response["choices"][0]["delta"]["content"] = incremental
                     yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n"
             except Exception as e: