From c981a576161a2c03553765f7e0d1f87722b67507 Mon Sep 17 00:00:00 2001
From: Jason Lee <lcjqq99@live.cn>
Date: Thu, 8 Aug 2024 12:25:57 +0800
Subject: [PATCH] fix: Reference markers in the context may be carried over
 into the next answer (#1855)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The answer in the context carries reference markers and passes them to
the large model, which may include the markers in the new answer,
leading to abnormal reference points.
```
 {'role': 'assistant', 'content': '设置在地下或半地下空间 ##0$$。'}
```

![image](https://github.com/user-attachments/assets/bcfdb3fc-7b54-44cb-ab70-2f9b715d06b8)

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- Bug Fix (non-breaking change which fixes an issue)
---
 api/db/services/dialog_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index ab43b944c..71429198a 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -168,7 +168,7 @@ def chat(dialog, messages, stream=True, **kwargs):
     gen_conf = dialog.llm_setting
 
     msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
-    msg.extend([{"role": m["role"], "content": m["content"]}
+    msg.extend([{"role": m["role"], "content": re.sub(r"##\d+\$\$", "", m["content"])}
                 for m in messages if m["role"] != "system"])
     used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
     assert len(msg) >= 2, f"message_fit_in has bug: {msg}"