Feat: let image citation being shown. (#7624)

### What problem does this PR solve?

#7623

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu 2025-05-13 19:30:05 +08:00 committed by GitHub
parent b4cc37f3c1
commit 01330fa428
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 6 additions and 1 deletions

View File

@ -15,6 +15,7 @@
# #
import json import json
import logging import logging
import re
from abc import ABC from abc import ABC
import pandas as pd import pandas as pd
@ -59,6 +60,7 @@ class Retrieval(ComponentBase, ABC):
def _run(self, history, **kwargs): def _run(self, history, **kwargs):
query = self.get_input() query = self.get_input()
query = str(query["content"][0]) if "content" in query else "" query = str(query["content"][0]) if "content" in query else ""
query = re.split(r"(USER:|ASSISTANT:)", query)[-1]
kb_ids: list[str] = self._param.kb_ids or [] kb_ids: list[str] = self._param.kb_ids or []

View File

@ -210,6 +210,7 @@ def completion():
"dataset_id": get_value(ck, "kb_id", "dataset_id"), "dataset_id": get_value(ck, "kb_id", "dataset_id"),
"image_id": get_value(ck, "image_id", "img_id"), "image_id": get_value(ck, "image_id", "img_id"),
"positions": get_value(ck, "positions", "position_int"), "positions": get_value(ck, "positions", "position_int"),
"doc_type": get_value(ck, "doc_type_kwd", "doc_type_kwd"),
} }
for ck in ref.get("chunks", []) for ck in ref.get("chunks", [])
] ]

View File

@ -117,7 +117,9 @@ def kb_prompt(kbinfos, max_tokens):
doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []}) doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []})
for i, ck in enumerate(kbinfos["chunks"][:chunks_num]): for i, ck in enumerate(kbinfos["chunks"][:chunks_num]):
doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + f"ID: {i}\n" + ck["content_with_weight"]) cnt = f"---\nID: {i}\n" + (f"URL: {ck['url']}\n" if "url" in ck else "")
cnt += ck["content_with_weight"]
doc2chunks[ck["docnm_kwd"]]["chunks"].append(cnt)
doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {}) doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {})
knowledges = [] knowledges = []