Use k_reranker also for result merge, and add special sorting use case for ChromaDB

This commit is contained in:
Marko Henning 2025-03-18 16:25:24 +01:00
parent f13948d805
commit ba676b7ed6

View File

@ -146,7 +146,10 @@ def query_doc_with_hybrid_search(
# retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
if k < k_reranker: if k < k_reranker:
sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) if VECTOR_DB == "chroma":
sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=False)
else:
sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
sorted_items = sorted_items[:k] sorted_items = sorted_items[:k]
distances, documents, metadatas = map(list, zip(*sorted_items)) distances, documents, metadatas = map(list, zip(*sorted_items))
result = { result = {
@ -310,9 +313,9 @@ def query_collection_with_hybrid_search(
if VECTOR_DB == "chroma": if VECTOR_DB == "chroma":
# Chroma uses unconventional cosine similarity, so we don't need to reverse the results # Chroma uses unconventional cosine similarity, so we don't need to reverse the results
# https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
return merge_and_sort_query_results(results, k=k, reverse=False) return merge_and_sort_query_results(results, k=k_reranker, reverse=False)
else: else:
return merge_and_sort_query_results(results, k=k, reverse=True) return merge_and_sort_query_results(results, k=k_reranker, reverse=True)
def get_embedding_function( def get_embedding_function(