Test: Update test cases for PR #6194 #6259 #6376 (#6444)

### What problem does this PR solve?

PR #6194 #6259 #6376

### Type of change

- [x] Update test cases
This commit is contained in:
liu an 2025-03-24 12:01:33 +08:00 committed by GitHub
parent 60afb63d44
commit e4c8d703b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 68 additions and 152 deletions

View File

@ -174,8 +174,6 @@ def stop_parse_documnet(auth, dataset_id, payload=None):
# CHUNK MANAGEMENT WITHIN DATASET # CHUNK MANAGEMENT WITHIN DATASET
def add_chunk(auth, dataset_id, document_id, payload=None): def add_chunk(auth, dataset_id, document_id, payload=None):
url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format( url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format(dataset_id=dataset_id, document_id=document_id)
dataset_id=dataset_id, document_id=document_id
)
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload) res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json() return res.json()

View File

@ -38,9 +38,7 @@ class TestAuthorization:
), ),
], ],
) )
def test_invalid_auth( def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message):
self, get_http_api_auth, auth, expected_code, expected_message
):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
res = delete_dataset(auth, {"ids": ids}) res = delete_dataset(auth, {"ids": ids})
assert res["code"] == expected_code assert res["code"] == expected_code
@ -73,9 +71,7 @@ class TestDatasetDeletion:
(lambda r: {"ids": r}, 0, "", 0), (lambda r: {"ids": r}, 0, "", 0),
], ],
) )
def test_basic_scenarios( def test_basic_scenarios(self, get_http_api_auth, payload, expected_code, expected_message, remaining):
self, get_http_api_auth, payload, expected_code, expected_message, remaining
):
ids = create_datasets(get_http_api_auth, 3) ids = create_datasets(get_http_api_auth, 3)
if callable(payload): if callable(payload):
payload = payload(ids) payload = payload(ids)
@ -120,7 +116,8 @@ class TestDatasetDeletion:
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
res = delete_dataset(get_http_api_auth, {"ids": ids + ids}) res = delete_dataset(get_http_api_auth, {"ids": ids + ids})
assert res["code"] == 0 assert res["code"] == 0
#assert res["data"]["success_count"] == 1 assert res["data"]["errors"][0] == f"Duplicate dataset ids: {ids[0]}"
assert res["data"]["success_count"] == 1
res = list_dataset(get_http_api_auth) res = list_dataset(get_http_api_auth)
assert len(res["data"]) == 0 assert len(res["data"]) == 0
@ -129,12 +126,7 @@ class TestDatasetDeletion:
ids = create_datasets(get_http_api_auth, 100) ids = create_datasets(get_http_api_auth, 100)
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [executor.submit(delete_dataset, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)]
executor.submit(
delete_dataset, get_http_api_auth, {"ids": ids[i : i + 1]}
)
for i in range(100)
]
responses = [f.result() for f in futures] responses = [f.result() for f in futures]
assert all(r["code"] == 0 for r in responses) assert all(r["code"] == 0 for r in responses)

View File

@ -38,9 +38,7 @@ class TestAuthorization:
), ),
], ],
) )
def test_invalid_auth( def test_invalid_auth(self, get_http_api_auth, tmp_path, auth, expected_code, expected_message):
self, get_http_api_auth, tmp_path, auth, expected_code, expected_message
):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
res = delete_documnet(auth, ids[0], {"ids": document_ids[0]}) res = delete_documnet(auth, ids[0], {"ids": document_ids[0]})
@ -54,11 +52,11 @@ class TestDocumentDeletion:
[ [
(None, 0, "", 0), (None, 0, "", 0),
({"ids": []}, 0, "", 0), ({"ids": []}, 0, "", 0),
({"ids": ["invalid_id"]}, 102, "Document not found!", 3), ({"ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", 3),
( (
{"ids": ["\n!?。;!?\"'"]}, {"ids": ["\n!?。;!?\"'"]},
102, 102,
"Document not found!", """Documents not found: [\'\\n!?。;!?"\\\'\']""",
3, 3,
), ),
( (
@ -86,8 +84,8 @@ class TestDocumentDeletion:
payload = payload(document_ids) payload = payload(document_ids)
res = delete_documnet(get_http_api_auth, ids[0], payload) res = delete_documnet(get_http_api_auth, ids[0], payload)
assert res["code"] == expected_code assert res["code"] == expected_code
#if res["code"] != 0: if res["code"] != 0:
# assert res["message"] == expected_message assert res["message"] == expected_message
res = list_documnet(get_http_api_auth, ids[0]) res = list_documnet(get_http_api_auth, ids[0])
assert len(res["data"]["docs"]) == remaining assert len(res["data"]["docs"]) == remaining
@ -104,16 +102,14 @@ class TestDocumentDeletion:
), ),
], ],
) )
def test_invalid_dataset_id( def test_invalid_dataset_id(self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message):
self, get_http_api_auth, tmp_path, dataset_id, expected_code, expected_message
):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 3, tmp_path)
res = delete_documnet(get_http_api_auth, dataset_id, {"ids": document_ids[:1]}) res = delete_documnet(get_http_api_auth, dataset_id, {"ids": document_ids[:1]})
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message
@pytest.mark.xfail(reason="issues/6174") # @pytest.mark.xfail(reason="issues/6174")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload", "payload",
[ [
@ -128,9 +124,8 @@ class TestDocumentDeletion:
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = delete_documnet(get_http_api_auth, ids[0], payload) res = delete_documnet(get_http_api_auth, ids[0], payload)
assert res["code"] == 0 assert res["code"] == 102
assert res["data"]["errors"][0] == "You don't own the dataset invalid_id" assert res["message"] == "Documents not found: ['invalid_id']"
assert res["data"]["success_count"] == 3
res = list_documnet(get_http_api_auth, ids[0]) res = list_documnet(get_http_api_auth, ids[0])
assert len(res["data"]["docs"]) == 0 assert len(res["data"]["docs"]) == 0
@ -143,17 +138,16 @@ class TestDocumentDeletion:
assert res["code"] == 0 assert res["code"] == 0
res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids}) res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids})
assert res["code"] in [102, 500] assert res["code"] == 102
#assert res["message"] == "Document not found!" assert res["message"] == f"Documents not found: {document_ids}"
@pytest.mark.xfail(reason="issues/6234")
def test_duplicate_deletion(self, get_http_api_auth, tmp_path): def test_duplicate_deletion(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
res = delete_documnet( res = delete_documnet(get_http_api_auth, ids[0], {"ids": document_ids + document_ids})
get_http_api_auth, ids[0], {"ids": document_ids + document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
assert res["data"]["errors"][0] == f"Duplicate document ids: {document_ids[0]}"
assert res["data"]["success_count"] == 1
res = list_documnet(get_http_api_auth, ids[0]) res = list_documnet(get_http_api_auth, ids[0])
assert len(res["data"]["docs"]) == 0 assert len(res["data"]["docs"]) == 0
@ -162,9 +156,7 @@ class TestDocumentDeletion:
def test_concurrent_deletion(self, get_http_api_auth, tmp_path): def test_concurrent_deletion(self, get_http_api_auth, tmp_path):
documnets_num = 100 documnets_num = 100
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, ids[0], documnets_num, tmp_path)
get_http_api_auth, ids[0], documnets_num, tmp_path
)
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [
@ -183,9 +175,7 @@ class TestDocumentDeletion:
def test_delete_1k(self, get_http_api_auth, tmp_path): def test_delete_1k(self, get_http_api_auth, tmp_path):
documnets_num = 1_000 documnets_num = 1_000
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, ids[0], documnets_num, tmp_path)
get_http_api_auth, ids[0], documnets_num, tmp_path
)
res = list_documnet(get_http_api_auth, ids[0]) res = list_documnet(get_http_api_auth, ids[0])
assert res["data"]["total"] == documnets_num assert res["data"]["total"] == documnets_num

View File

@ -50,9 +50,7 @@ class TestAuthorization:
), ),
], ],
) )
def test_invalid_auth( def test_invalid_auth(self, get_http_api_auth, tmp_path, auth, expected_code, expected_message):
self, get_http_api_auth, tmp_path, auth, expected_code, expected_message
):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
res = parse_documnet(auth, ids[0], {"document_ids": document_ids[0]}) res = parse_documnet(auth, ids[0], {"document_ids": document_ids[0]})
@ -68,32 +66,30 @@ class TestDocumentsParse:
None, None,
102, 102,
"""AttributeError("\'NoneType\' object has no attribute \'get\'")""", """AttributeError("\'NoneType\' object has no attribute \'get\'")""",
marks=pytest.mark.xfail, marks=pytest.mark.skip,
), ),
({"document_ids": []}, 102, "`document_ids` is required"), ({"document_ids": []}, 102, "`document_ids` is required"),
( (
{"document_ids": ["invalid_id"]}, {"document_ids": ["invalid_id"]},
102, 102,
"You don't own the document invalid_id.", "Documents not found: ['invalid_id']",
), ),
( (
{"document_ids": ["\n!?。;!?\"'"]}, {"document_ids": ["\n!?。;!?\"'"]},
102, 102,
"""You don\'t own the document \n!?。;!?"\'.""", """Documents not found: [\'\\n!?。;!?"\\\'\']""",
), ),
pytest.param( pytest.param(
"not json", "not json",
102, 102,
"AttributeError(\"'str' object has no attribute 'get'\")", "AttributeError(\"'str' object has no attribute 'get'\")",
marks=pytest.mark.xfail, marks=pytest.mark.skip,
), ),
(lambda r: {"document_ids": r[:1]}, 0, ""), (lambda r: {"document_ids": r[:1]}, 0, ""),
(lambda r: {"document_ids": r}, 0, ""), (lambda r: {"document_ids": r}, 0, ""),
], ],
) )
def test_basic_scenarios( def test_basic_scenarios(self, get_http_api_auth, tmp_path, payload, expected_code, expected_message):
self, get_http_api_auth, tmp_path, payload, expected_code, expected_message
):
@wait_for(10, 1, "Document parsing timeout") @wait_for(10, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_ids): def condition(_auth, _dataset_id, _document_ids):
for _document_id in _document_ids: for _document_id in _document_ids:
@ -104,20 +100,16 @@ class TestDocumentsParse:
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path)
get_http_api_auth, dataset_id, 3, tmp_path
)
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = parse_documnet(get_http_api_auth, dataset_id, payload) res = parse_documnet(get_http_api_auth, dataset_id, payload)
assert res["code"] == expected_code assert res["code"] == expected_code
#if expected_code != 0: if expected_code != 0:
# assert res["message"] == expected_message assert res["message"] == expected_message
if expected_code == 0: if expected_code == 0:
condition(get_http_api_auth, dataset_id, payload["document_ids"]) condition(get_http_api_auth, dataset_id, payload["document_ids"])
validate_document_details( validate_document_details(get_http_api_auth, dataset_id, payload["document_ids"])
get_http_api_auth, dataset_id, payload["document_ids"]
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"dataset_id, expected_code, expected_message", "dataset_id, expected_code, expected_message",
@ -140,13 +132,10 @@ class TestDocumentsParse:
): ):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
res = parse_documnet( res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message
@pytest.mark.skip(reason="issues/6229")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload", "payload",
[ [
@ -155,9 +144,7 @@ class TestDocumentsParse:
lambda r: {"document_ids": r + ["invalid_id"]}, lambda r: {"document_ids": r + ["invalid_id"]},
], ],
) )
def test_parse_partial_invalid_document_id( def test_parse_partial_invalid_document_id(self, get_http_api_auth, tmp_path, payload):
self, get_http_api_auth, tmp_path, payload
):
@wait_for(10, 1, "Document parsing timeout") @wait_for(10, 1, "Document parsing timeout")
def condition(_auth, _dataset_id): def condition(_auth, _dataset_id):
res = list_documnet(_auth, _dataset_id) res = list_documnet(_auth, _dataset_id)
@ -168,14 +155,12 @@ class TestDocumentsParse:
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path)
get_http_api_auth, dataset_id, 3, tmp_path
)
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = parse_documnet(get_http_api_auth, dataset_id, payload) res = parse_documnet(get_http_api_auth, dataset_id, payload)
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "You don't own the document invalid_id." assert res["message"] == "Documents not found: ['invalid_id']"
condition(get_http_api_auth, dataset_id) condition(get_http_api_auth, dataset_id)
@ -192,22 +177,15 @@ class TestDocumentsParse:
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path)
get_http_api_auth, dataset_id, 1, tmp_path res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
)
res = parse_documnet(
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
condition(get_http_api_auth, dataset_id) condition(get_http_api_auth, dataset_id)
res = parse_documnet( res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
@pytest.mark.skip(reason="issues/6234")
def test_duplicate_parse(self, get_http_api_auth, tmp_path): def test_duplicate_parse(self, get_http_api_auth, tmp_path):
@wait_for(10, 1, "Document parsing timeout") @wait_for(10, 1, "Document parsing timeout")
def condition(_auth, _dataset_id): def condition(_auth, _dataset_id):
@ -219,13 +197,11 @@ class TestDocumentsParse:
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path)
get_http_api_auth, dataset_id, 1, tmp_path res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids})
)
res = parse_documnet(
get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
assert res["data"]["errors"][0] == f"Duplicate document ids: {document_ids[0]}"
assert res["data"]["success_count"] == 1
condition(get_http_api_auth, dataset_id) condition(get_http_api_auth, dataset_id)
@ -244,12 +220,8 @@ class TestDocumentsParse:
document_num = 100 document_num = 100
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
get_http_api_auth, dataset_id, document_num, tmp_path res = parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
)
res = parse_documnet(
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
condition(get_http_api_auth, dataset_id, document_num) condition(get_http_api_auth, dataset_id, document_num)
@ -269,9 +241,7 @@ class TestDocumentsParse:
document_num = 100 document_num = 100
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
get_http_api_auth, dataset_id, document_num, tmp_path
)
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor:
futures = [ futures = [

View File

@ -60,9 +60,7 @@ class TestAuthorization:
), ),
], ],
) )
def test_invalid_auth( def test_invalid_auth(self, get_http_api_auth, auth, expected_code, expected_message):
self, get_http_api_auth, auth, expected_code, expected_message
):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
res = stop_parse_documnet(auth, ids[0]) res = stop_parse_documnet(auth, ids[0])
assert res["code"] == expected_code assert res["code"] == expected_code
@ -78,7 +76,7 @@ class TestDocumentsParseStop:
None, None,
102, 102,
"""AttributeError("\'NoneType\' object has no attribute \'get\'")""", """AttributeError("\'NoneType\' object has no attribute \'get\'")""",
marks=pytest.mark.xfail, marks=pytest.mark.skip,
), ),
({"document_ids": []}, 102, "`document_ids` is required"), ({"document_ids": []}, 102, "`document_ids` is required"),
( (
@ -95,15 +93,13 @@ class TestDocumentsParseStop:
"not json", "not json",
102, 102,
"AttributeError(\"'str' object has no attribute 'get'\")", "AttributeError(\"'str' object has no attribute 'get'\")",
marks=pytest.mark.xfail, marks=pytest.mark.skip,
), ),
(lambda r: {"document_ids": r[:1]}, 0, ""), (lambda r: {"document_ids": r[:1]}, 0, ""),
(lambda r: {"document_ids": r}, 0, ""), (lambda r: {"document_ids": r}, 0, ""),
], ],
) )
def test_basic_scenarios( def test_basic_scenarios(self, get_http_api_auth, tmp_path, payload, expected_code, expected_message):
self, get_http_api_auth, tmp_path, payload, expected_code, expected_message
):
@wait_for(10, 1, "Document parsing timeout") @wait_for(10, 1, "Document parsing timeout")
def condition(_auth, _dataset_id, _document_ids): def condition(_auth, _dataset_id, _document_ids):
for _document_id in _document_ids: for _document_id in _document_ids:
@ -114,9 +110,7 @@ class TestDocumentsParseStop:
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path)
get_http_api_auth, dataset_id, 3, tmp_path
)
parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
if callable(payload): if callable(payload):
@ -127,16 +121,10 @@ class TestDocumentsParseStop:
if expected_code != 0: if expected_code != 0:
assert res["message"] == expected_message assert res["message"] == expected_message
else: else:
completed_document_ids = list( completed_document_ids = list(set(document_ids) - set(payload["document_ids"]))
set(document_ids) - set(payload["document_ids"])
)
condition(get_http_api_auth, dataset_id, completed_document_ids) condition(get_http_api_auth, dataset_id, completed_document_ids)
validate_document_parse_cancel( validate_document_parse_cancel(get_http_api_auth, dataset_id, payload["document_ids"])
get_http_api_auth, dataset_id, payload["document_ids"] validate_document_parse_done(get_http_api_auth, dataset_id, completed_document_ids)
)
validate_document_parse_done(
get_http_api_auth, dataset_id, completed_document_ids
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"dataset_id, expected_code, expected_message", "dataset_id, expected_code, expected_message",
@ -159,13 +147,11 @@ class TestDocumentsParseStop:
): ):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path) document_ids = batch_upload_documents(get_http_api_auth, ids[0], 1, tmp_path)
res = stop_parse_documnet( res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == expected_code assert res["code"] == expected_code
assert res["message"] == expected_message assert res["message"] == expected_message
@pytest.mark.xfail @pytest.mark.skip
@pytest.mark.parametrize( @pytest.mark.parametrize(
"payload", "payload",
[ [
@ -174,54 +160,40 @@ class TestDocumentsParseStop:
lambda r: {"document_ids": r + ["invalid_id"]}, lambda r: {"document_ids": r + ["invalid_id"]},
], ],
) )
def test_stop_parse_partial_invalid_document_id( def test_stop_parse_partial_invalid_document_id(self, get_http_api_auth, tmp_path, payload):
self, get_http_api_auth, tmp_path, payload
):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 3, tmp_path)
get_http_api_auth, dataset_id, 3, tmp_path
)
parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
if callable(payload): if callable(payload):
payload = payload(document_ids) payload = payload(document_ids)
res = stop_parse_documnet(get_http_api_auth, dataset_id, payload) res = stop_parse_documnet(get_http_api_auth, dataset_id, payload)
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "You don't own the document invalid_id."
validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids)
def test_repeated_stop_parse(self, get_http_api_auth, tmp_path): def test_repeated_stop_parse(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path)
get_http_api_auth, dataset_id, 1, tmp_path
)
parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
res = stop_parse_documnet( res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
res = stop_parse_documnet( res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 102 assert res["code"] == 102
assert res["message"] == "Can't stop parsing document with progress at 0 or 1" assert res["message"] == "Can't stop parsing document with progress at 0 or 1"
@pytest.mark.xfail
def test_duplicate_stop_parse(self, get_http_api_auth, tmp_path): def test_duplicate_stop_parse(self, get_http_api_auth, tmp_path):
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, 1, tmp_path)
get_http_api_auth, dataset_id, 1, tmp_path
)
parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
res = stop_parse_documnet( res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
assert res["success_count"] == 1 assert res["data"]["success_count"] == 1
assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"] assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"]
@pytest.mark.slow @pytest.mark.slow
@ -229,13 +201,9 @@ class TestDocumentsParseStop:
document_num = 100 document_num = 100
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
get_http_api_auth, dataset_id, document_num, tmp_path
)
parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
res = stop_parse_documnet( res = stop_parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
get_http_api_auth, dataset_id, {"document_ids": document_ids}
)
assert res["code"] == 0 assert res["code"] == 0
validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids) validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids)
@ -244,9 +212,7 @@ class TestDocumentsParseStop:
document_num = 50 document_num = 50
ids = create_datasets(get_http_api_auth, 1) ids = create_datasets(get_http_api_auth, 1)
dataset_id = ids[0] dataset_id = ids[0]
document_ids = batch_upload_documents( document_ids = batch_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
get_http_api_auth, dataset_id, document_num, tmp_path
)
parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids}) parse_documnet(get_http_api_auth, dataset_id, {"document_ids": document_ids})
with ThreadPoolExecutor(max_workers=5) as executor: with ThreadPoolExecutor(max_workers=5) as executor: