From bc578e1e83da0e4ac313dc033aaecb8d480d9ef1 Mon Sep 17 00:00:00 2001 From: Sol <55039727+Sol-Ghf@users.noreply.github.com> Date: Tue, 27 May 2025 16:58:18 +0800 Subject: [PATCH] Removed the "USER:" in the question, reducing the accuracy of the search (#7852) ### What problem does this PR solve? ![85784793b445e081ea1c7524b568123f](https://github.com/user-attachments/assets/88748407-ea3d-445a-9dae-8f02cfdf78f3) ![77e59b94b621b3b6fdda654104f01d1a](https://github.com/user-attachments/assets/6531c691-a625-48c4-b05f-c64f8acd7c28) ![73e91d72114b905cfa39e804cd3240a3](https://github.com/user-attachments/assets/eb9d0bb2-4aac-40d8-8444-cdcbc0835568) ![45c8a52ecf5e1603354c4d0a814ecf06](https://github.com/user-attachments/assets/d56162a4-8168-4e7f-a113-17ec258b9539) user will be used as a common keyword to participate in the search, which may lead to the recall of irrelevant content and reduce the search accuracy. If user appears frequently in your knowledge base, it may affect relevance sorting and even recall some irrelevant FAQs or documents. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [x] Performance Improvement - [ ] Other (please describe): --------- Co-authored-by: Kevin Hu --- agent/component/retrieval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/component/retrieval.py b/agent/component/retrieval.py index 859d65478..218dae969 100644 --- a/agent/component/retrieval.py +++ b/agent/component/retrieval.py @@ -96,6 +96,7 @@ class Retrieval(ComponentBase, ABC): rerank_mdl = LLMBundle(kbs[0].tenant_id, LLMType.RERANK, self._param.rerank_id) if kbs: + query = re.sub(r"^user[::\s]*", "", query, flags=re.IGNORECASE) kbinfos = settings.retrievaler.retrieval( query, embd_mdl,