From 01330fa428b760bc91642edf39b136e4ab0b4a84 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Tue, 13 May 2025 19:30:05 +0800
Subject: [PATCH] Feat: let image citation being shown. (#7624)

### What problem does this PR solve?

#7623

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 agent/component/retrieval.py | 2 ++
 api/apps/conversation_app.py | 1 +
 rag/prompts.py               | 4 +++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/agent/component/retrieval.py b/agent/component/retrieval.py
index 0a34b5d66..859d65478 100644
--- a/agent/component/retrieval.py
+++ b/agent/component/retrieval.py
@@ -15,6 +15,7 @@
 #
 import json
 import logging
+import re
 from abc import ABC
 
 import pandas as pd
@@ -59,6 +60,7 @@ class Retrieval(ComponentBase, ABC):
     def _run(self, history, **kwargs):
         query = self.get_input()
         query = str(query["content"][0]) if "content" in query else ""
+        query = re.split(r"(USER:|ASSISTANT:)", query)[-1]
 
         kb_ids: list[str] = self._param.kb_ids or []
 
diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py
index 5eaeb1925..c05710a4f 100644
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@@ -210,6 +210,7 @@ def completion():
                         "dataset_id": get_value(ck, "kb_id", "dataset_id"),
                         "image_id": get_value(ck, "image_id", "img_id"),
                         "positions": get_value(ck, "positions", "position_int"),
+                        "doc_type": get_value(ck, "doc_type_kwd", "doc_type_kwd"),
                     }
                     for ck in ref.get("chunks", [])
                 ]
diff --git a/rag/prompts.py b/rag/prompts.py
index 7d061b810..4a61de557 100644
--- a/rag/prompts.py
+++ b/rag/prompts.py
@@ -117,7 +117,9 @@ def kb_prompt(kbinfos, max_tokens):
 
     doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []})
     for i, ck in enumerate(kbinfos["chunks"][:chunks_num]):
-        doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + f"ID: {i}\n" + ck["content_with_weight"])
+        cnt = f"---\nID: {i}\n" + (f"URL: {ck['url']}\n" if "url" in ck else "")
+        cnt += ck["content_with_weight"]
+        doc2chunks[ck["docnm_kwd"]]["chunks"].append(cnt)
         doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {})
 
     knowledges = []