mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-03 10:53:59 +08:00
Feat: delete useless image blobs when task executor meet edge cases (#7727)
### What problem does this PR solve? delete useless image blobs when the task executor meets edge cases ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
76b278af8e
commit
e3e7c7ddaa
@ -577,6 +577,16 @@ async def do_handle_task(task):
|
|||||||
start_ts = timer()
|
start_ts = timer()
|
||||||
doc_store_result = ""
|
doc_store_result = ""
|
||||||
es_bulk_size = 4
|
es_bulk_size = 4
|
||||||
|
|
||||||
|
async def delete_image(kb_id, chunk_id):
|
||||||
|
try:
|
||||||
|
async with minio_limiter:
|
||||||
|
STORAGE_IMPL.delete(kb_id, chunk_id)
|
||||||
|
except Exception:
|
||||||
|
logging.exception(
|
||||||
|
"Deleting image of chunk {}/{}/{} got exception".format(task["location"], task["name"], chunk_id))
|
||||||
|
raise
|
||||||
|
|
||||||
for b in range(0, len(chunks), es_bulk_size):
|
for b in range(0, len(chunks), es_bulk_size):
|
||||||
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(task_tenant_id), task_dataset_id))
|
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + es_bulk_size], search.index_name(task_tenant_id), task_dataset_id))
|
||||||
if b % 128 == 0:
|
if b % 128 == 0:
|
||||||
@ -592,7 +602,11 @@ async def do_handle_task(task):
|
|||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
logging.warning(f"do_handle_task update_chunk_ids failed since task {task['id']} is unknown.")
|
logging.warning(f"do_handle_task update_chunk_ids failed since task {task['id']} is unknown.")
|
||||||
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"id": chunk_ids}, search.index_name(task_tenant_id), task_dataset_id))
|
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"id": chunk_ids}, search.index_name(task_tenant_id), task_dataset_id))
|
||||||
|
async with trio.open_nursery() as nursery:
|
||||||
|
for chunk_id in chunk_ids:
|
||||||
|
nursery.start_soon(delete_image, task_dataset_id, chunk_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
logging.info("Indexing doc({}), page({}-{}), chunks({}), elapsed: {:.2f}".format(task_document_name, task_from_page,
|
logging.info("Indexing doc({}), page({}-{}), chunks({}), elapsed: {:.2f}".format(task_document_name, task_from_page,
|
||||||
task_to_page, len(chunks),
|
task_to_page, len(chunks),
|
||||||
timer() - start_ts))
|
timer() - start_ts))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user