From 01330fa428b760bc91642edf39b136e4ab0b4a84 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 13 May 2025 19:30:05 +0800 Subject: [PATCH] Feat: let image citation being shown. (#7624) ### What problem does this PR solve? #7623 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- agent/component/retrieval.py | 2 ++ api/apps/conversation_app.py | 1 + rag/prompts.py | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/agent/component/retrieval.py b/agent/component/retrieval.py index 0a34b5d66..859d65478 100644 --- a/agent/component/retrieval.py +++ b/agent/component/retrieval.py @@ -15,6 +15,7 @@ # import json import logging +import re from abc import ABC import pandas as pd @@ -59,6 +60,7 @@ class Retrieval(ComponentBase, ABC): def _run(self, history, **kwargs): query = self.get_input() query = str(query["content"][0]) if "content" in query else "" + query = re.split(r"(USER:|ASSISTANT:)", query)[-1] kb_ids: list[str] = self._param.kb_ids or [] diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 5eaeb1925..c05710a4f 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -210,6 +210,7 @@ def completion(): "dataset_id": get_value(ck, "kb_id", "dataset_id"), "image_id": get_value(ck, "image_id", "img_id"), "positions": get_value(ck, "positions", "position_int"), + "doc_type": get_value(ck, "doc_type_kwd", "doc_type_kwd"), } for ck in ref.get("chunks", []) ] diff --git a/rag/prompts.py b/rag/prompts.py index 7d061b810..4a61de557 100644 --- a/rag/prompts.py +++ b/rag/prompts.py @@ -117,7 +117,9 @@ def kb_prompt(kbinfos, max_tokens): doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []}) for i, ck in enumerate(kbinfos["chunks"][:chunks_num]): - doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + f"ID: {i}\n" + ck["content_with_weight"]) + cnt = f"---\nID: {i}\n" + (f"URL: {ck['url']}\n" if "url" in ck else "") + cnt += ck["content_with_weight"] + doc2chunks[ck["docnm_kwd"]]["chunks"].append(cnt) doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {}) knowledges = []