From 43cf32194253303ff0312470d4983acd8d5dcbe9 Mon Sep 17 00:00:00 2001
From: xintoteai <160626233+xintoteai@users.noreply.github.com>
Date: Thu, 10 Apr 2025 04:17:45 -0700
Subject: [PATCH] Added similarity scores in reference chunks (#6918)

- Returning 3 similarity scores to the chat completion's `reference`
field. It gives the user more transparency and added flexibility to
display/rerank the reference when needed

Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
---
 rag/prompts.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rag/prompts.py b/rag/prompts.py
index 7433d404..489ead2f 100644
--- a/rag/prompts.py
+++ b/rag/prompts.py
@@ -39,7 +39,10 @@ def chunks_format(reference):
         "dataset_id": get_value(chunk, "kb_id", "dataset_id"),
         "image_id": get_value(chunk, "image_id", "img_id"),
         "positions": get_value(chunk, "positions", "position_int"),
-        "url": chunk.get("url")
+        "url": chunk.get("url"),
+        "similarity": chunk.get("similarity"),
+        "vector_similarity": chunk.get("vector_similarity"),
+        "term_similarity": chunk.get("term_similarity"),
     } for chunk in reference.get("chunks", [])]