Refactoring large integers to improve readability (#2636)

### What problem does this PR solve?

Refactoring large integers

### Type of change

- [x] Refactoring
This commit is contained in:
yqkcn 2024-09-29 10:17:42 +08:00 committed by GitHub
parent 604061c4a5
commit 57237634f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -133,9 +133,8 @@ class TaskService(CommonService):
cls.model.id == id).execute() cls.model.id == id).execute()
def queue_tasks(doc, bucket, name): def queue_tasks(doc: dict, bucket: str, name: str):
def new_task(): def new_task():
nonlocal doc
return { return {
"id": get_uuid(), "id": get_uuid(),
"doc_id": doc["id"] "doc_id": doc["id"]
@ -149,15 +148,9 @@ def queue_tasks(doc, bucket, name):
page_size = doc["parser_config"].get("task_page_size", 12) page_size = doc["parser_config"].get("task_page_size", 12)
if doc["parser_id"] == "paper": if doc["parser_id"] == "paper":
page_size = doc["parser_config"].get("task_page_size", 22) page_size = doc["parser_config"].get("task_page_size", 22)
if doc["parser_id"] == "one": if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
page_size = 1000000000 page_size = 10 ** 9
if doc["parser_id"] == "knowledge_graph": page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
page_size = 1000000000
if not do_layout:
page_size = 1000000000
page_ranges = doc["parser_config"].get("pages")
if not page_ranges:
page_ranges = [(1, 100000)]
for s, e in page_ranges: for s, e in page_ranges:
s -= 1 s -= 1
s = max(0, s) s = max(0, s)
@ -170,8 +163,7 @@ def queue_tasks(doc, bucket, name):
elif doc["parser_id"] == "table": elif doc["parser_id"] == "table":
file_bin = STORAGE_IMPL.get(bucket, name) file_bin = STORAGE_IMPL.get(bucket, name)
rn = RAGFlowExcelParser.row_number( rn = RAGFlowExcelParser.row_number(doc["name"], file_bin)
doc["name"], file_bin)
for i in range(0, rn, 3000): for i in range(0, rn, 3000):
task = new_task() task = new_task()
task["from_page"] = i task["from_page"] = i