fix: optimize unique document filtering with set (#10082)

This commit is contained in:
omr 2024-10-31 17:32:58 +09:00 committed by GitHub
parent 05d9adeb99
commit 11ca1bec0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -27,16 +27,15 @@ class RerankModelRunner(BaseRerankRunner):
:return:
"""
docs = []
doc_id = []
doc_id = set()
unique_documents = []
dify_documents = [item for item in documents if item.provider == "dify"]
external_documents = [item for item in documents if item.provider == "external"]
for document in dify_documents:
if document.metadata["doc_id"] not in doc_id:
doc_id.append(document.metadata["doc_id"])
for document in documents:
if document.provider == "dify" and document.metadata["doc_id"] not in doc_id:
doc_id.add(document.metadata["doc_id"])
docs.append(document.page_content)
unique_documents.append(document)
for document in external_documents:
elif document.provider == "external":
if document not in unique_documents:
docs.append(document.page_content)
unique_documents.append(document)