mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 07:25:51 +08:00
fix: better WeightRerankRunner run logic use O(1) and delete unused code (#10849)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
bc1013dacf
commit
58a9d9eb9a
@ -36,23 +36,21 @@ class WeightRerankRunner(BaseRerankRunner):
|
|||||||
|
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
docs = []
|
|
||||||
doc_id = []
|
|
||||||
unique_documents = []
|
unique_documents = []
|
||||||
|
doc_id = set()
|
||||||
for document in documents:
|
for document in documents:
|
||||||
if document.metadata["doc_id"] not in doc_id:
|
doc_id = document.metadata.get("doc_id")
|
||||||
doc_id.append(document.metadata["doc_id"])
|
if doc_id not in doc_id:
|
||||||
docs.append(document.page_content)
|
doc_id.add(doc_id)
|
||||||
unique_documents.append(document)
|
unique_documents.append(document)
|
||||||
|
|
||||||
documents = unique_documents
|
documents = unique_documents
|
||||||
|
|
||||||
rerank_documents = []
|
|
||||||
query_scores = self._calculate_keyword_score(query, documents)
|
query_scores = self._calculate_keyword_score(query, documents)
|
||||||
|
|
||||||
query_vector_scores = self._calculate_cosine(self.tenant_id, query, documents, self.weights.vector_setting)
|
query_vector_scores = self._calculate_cosine(self.tenant_id, query, documents, self.weights.vector_setting)
|
||||||
|
|
||||||
|
rerank_documents = []
|
||||||
for document, query_score, query_vector_score in zip(documents, query_scores, query_vector_scores):
|
for document, query_score, query_vector_score in zip(documents, query_scores, query_vector_scores):
|
||||||
# format document
|
|
||||||
score = (
|
score = (
|
||||||
self.weights.vector_setting.vector_weight * query_vector_score
|
self.weights.vector_setting.vector_weight * query_vector_score
|
||||||
+ self.weights.keyword_setting.keyword_weight * query_score
|
+ self.weights.keyword_setting.keyword_weight * query_score
|
||||||
@ -61,7 +59,8 @@ class WeightRerankRunner(BaseRerankRunner):
|
|||||||
continue
|
continue
|
||||||
document.metadata["score"] = score
|
document.metadata["score"] = score
|
||||||
rerank_documents.append(document)
|
rerank_documents.append(document)
|
||||||
rerank_documents = sorted(rerank_documents, key=lambda x: x.metadata["score"], reverse=True)
|
|
||||||
|
rerank_documents.sort(key=lambda x: x.metadata["score"], reverse=True)
|
||||||
return rerank_documents[:top_n] if top_n else rerank_documents
|
return rerank_documents[:top_n] if top_n else rerank_documents
|
||||||
|
|
||||||
def _calculate_keyword_score(self, query: str, documents: list[Document]) -> list[float]:
|
def _calculate_keyword_score(self, query: str, documents: list[Document]) -> list[float]:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user