diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index e068285fe..bb6303367 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -277,28 +277,27 @@ async def build_chunks(task, progress_callback): async def upload_to_minio(document, chunk): try: - async with minio_limiter: - d = copy.deepcopy(document) - d.update(chunk) - d["id"] = xxhash.xxh64((chunk["content_with_weight"] + str(d["doc_id"])).encode("utf-8")).hexdigest() - d["create_time"] = str(datetime.now()).replace("T", " ")[:19] - d["create_timestamp_flt"] = datetime.now().timestamp() - if not d.get("image"): - _ = d.pop("image", None) - d["img_id"] = "" - docs.append(d) - return - - output_buffer = BytesIO() - if isinstance(d["image"], bytes): - output_buffer = BytesIO(d["image"]) - else: - d["image"].save(output_buffer, format='JPEG') - await trio.to_thread.run_sync(lambda: STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue())) - - d["img_id"] = "{}-{}".format(task["kb_id"], d["id"]) - del d["image"] + d = copy.deepcopy(document) + d.update(chunk) + d["id"] = xxhash.xxh64((chunk["content_with_weight"] + str(d["doc_id"])).encode("utf-8")).hexdigest() + d["create_time"] = str(datetime.now()).replace("T", " ")[:19] + d["create_timestamp_flt"] = datetime.now().timestamp() + if not d.get("image"): + _ = d.pop("image", None) + d["img_id"] = "" docs.append(d) + return + + output_buffer = BytesIO() + if isinstance(d["image"], bytes): + output_buffer = BytesIO(d["image"]) + else: + d["image"].save(output_buffer, format='JPEG') + async with minio_limiter: + await trio.to_thread.run_sync(lambda: STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue())) + d["img_id"] = "{}-{}".format(task["kb_id"], d["id"]) + del d["image"] + docs.append(d) except Exception: logging.exception( "Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["id"]))