index add to db when dataset updated (#588)

This commit is contained in:
Jyong 2023-07-18 15:02:33 +08:00 committed by GitHub
parent 0d3cd3b16a
commit 9f28a48a92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -44,14 +44,13 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
if dataset_documents: if dataset_documents:
# save vector index # save vector index
index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True) index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True)
documents = []
for dataset_document in dataset_documents: for dataset_document in dataset_documents:
# delete from vector index # delete from vector index
segments = db.session.query(DocumentSegment).filter( segments = db.session.query(DocumentSegment).filter(
DocumentSegment.document_id == dataset_document.id, DocumentSegment.document_id == dataset_document.id,
DocumentSegment.enabled == True DocumentSegment.enabled == True
) .order_by(DocumentSegment.position.asc()).all() ) .order_by(DocumentSegment.position.asc()).all()
documents = []
for segment in segments: for segment in segments:
document = Document( document = Document(
page_content=segment.content, page_content=segment.content,
@ -65,8 +64,8 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
documents.append(document) documents.append(document)
# save vector index # save vector index
index.add_texts(documents) index.add_texts(documents)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info( logging.info(