Fix(api): correct document parsing progress check logic (#6318)

- Fix incorrect progress check condition that prevented re-parsing of
completed documents
- Allow parsing for documents with progress 0.0 (not started) or 1.0
(completed)
- Only block parsing for documents currently in progress (0.0 < progress
< 1.0)

Close #6312

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
hy89 2025-03-20 16:00:17 +08:00 committed by GitHub
parent f0c4b28c6b
commit 1d9ca172e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 4 deletions

View File

@ -688,9 +688,9 @@ def parse(tenant_id, dataset_id):
continue
if not doc:
return get_error_data_result(message=f"You don't own the document {id}.")
if doc[0].progress != 0.0:
if 0.0 < doc[0].progress < 1.0:
return get_error_data_result(
"Can't stop parsing document with progress at 0 or 100"
"Can't parse document that is currently being processed"
)
info = {"run": "1", "progress": 0, "progress_msg": "", "chunk_num": 0, "token_num": 0}
DocumentService.update_by_id(id, info)

View File

@ -205,8 +205,7 @@ class TestDocumentsParse:
res = parse_documnet(
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 102
assert res["message"] == "Can't stop parsing document with progress at 0 or 100"
assert res["code"] == 0
@pytest.mark.skip(reason="issues/6234")
def test_duplicate_parse(self, get_http_api_auth, tmp_path):