mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 06:09:05 +08:00
TEST: Added test cases for Create Dataset HTTP API (#5724)
### What problem does this PR solve? 1. add test cases 2. integrate workflows/tests.yml into CI pipeline ### Type of change - [x] add testcases
This commit is contained in:
parent
27153dde85
commit
4f9504305a
19
.github/workflows/tests.yml
vendored
19
.github/workflows/tests.yml
vendored
@ -98,6 +98,15 @@ jobs:
|
||||
done
|
||||
cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py
|
||||
|
||||
- name: Run http api tests against Elasticsearch
|
||||
run: |
|
||||
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
||||
export HOST_ADDRESS=http://host.docker.internal:9380
|
||||
until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do
|
||||
echo "Waiting for service to be available..."
|
||||
sleep 5
|
||||
done
|
||||
cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && pytest -s --tb=short -m "not slow"
|
||||
|
||||
- name: Stop ragflow:nightly
|
||||
if: always() # always run this step even if previous steps failed
|
||||
@ -128,6 +137,16 @@ jobs:
|
||||
done
|
||||
cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py
|
||||
|
||||
- name: Run http api tests against Infinity
|
||||
run: |
|
||||
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
|
||||
export HOST_ADDRESS=http://host.docker.internal:9380
|
||||
until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do
|
||||
echo "Waiting for service to be available..."
|
||||
sleep 5
|
||||
done
|
||||
cd sdk/python && uv sync --python 3.10 --frozen && uv pip install . && source .venv/bin/activate && cd test/test_http_api && pytest -s --tb=short -m "not slow"
|
||||
|
||||
- name: Stop ragflow:nightly
|
||||
if: always() # always run this step even if previous steps failed
|
||||
run: |
|
||||
|
@ -18,3 +18,8 @@ dependencies = [
|
||||
test = [
|
||||
"pytest>=8.0.0,<9.0.0"
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
markers = [
|
||||
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||
]
|
@ -18,6 +18,8 @@ import os
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
||||
|
||||
|
||||
@ -85,3 +87,8 @@ def get_auth():
|
||||
@pytest.fixture(scope="session")
|
||||
def get_email():
|
||||
return EMAIL
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def get_http_api_auth(get_api_key_fixture):
|
||||
return RAGFlowHttpApiAuth(get_api_key_fixture)
|
||||
|
29
sdk/python/test/data/logo.svg
Normal file
29
sdk/python/test/data/logo.svg
Normal file
@ -0,0 +1,29 @@
|
||||
<svg width="32" height="34" viewBox="0 0 32 34" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M3.43265 20.7677C4.15835 21.5062 4.15834 22.7035 3.43262 23.4419L3.39546 23.4797C2.66974 24.2182 1.49312 24.2182 0.767417 23.4797C0.0417107 22.7412 0.0417219 21.544 0.767442 20.8055L0.804608 20.7677C1.53033 20.0292 2.70694 20.0293 3.43265 20.7677Z"
|
||||
fill="#B2DDFF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M12.1689 21.3375C12.8933 22.0773 12.8912 23.2746 12.1641 24.0117L7.01662 29.2307C6.2896 29.9678 5.11299 29.9657 4.38859 29.2259C3.66419 28.4861 3.66632 27.2888 4.39334 26.5517L9.54085 21.3327C10.2679 20.5956 11.4445 20.5977 12.1689 21.3375Z"
|
||||
fill="#53B1FD" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M19.1551 30.3217C19.7244 29.4528 20.8781 29.218 21.7321 29.7973L21.8436 29.8729C22.6975 30.4522 22.9283 31.6262 22.359 32.4952C21.7897 33.3641 20.6359 33.5989 19.782 33.0196L19.6705 32.944C18.8165 32.3647 18.5858 31.1907 19.1551 30.3217Z"
|
||||
fill="#B2DDFF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M31.4184 20.6544C32.1441 21.3929 32.1441 22.5902 31.4184 23.3286L28.8911 25.9003C28.1654 26.6388 26.9887 26.6388 26.263 25.9003C25.5373 25.1619 25.5373 23.9646 26.263 23.2261L28.7903 20.6544C29.516 19.916 30.6927 19.916 31.4184 20.6544Z"
|
||||
fill="#53B1FD" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M31.4557 11.1427C32.1814 11.8812 32.1814 13.0785 31.4557 13.8169L12.7797 32.8209C12.054 33.5594 10.8774 33.5594 10.1517 32.8209C9.42599 32.0825 9.42599 30.8852 10.1517 30.1467L28.8277 11.1427C29.5534 10.4043 30.73 10.4043 31.4557 11.1427Z"
|
||||
fill="#1570EF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M27.925 5.29994C28.6508 6.0384 28.6508 7.23568 27.925 7.97414L17.184 18.9038C16.4583 19.6423 15.2817 19.6423 14.556 18.9038C13.8303 18.1653 13.8303 16.9681 14.556 16.2296L25.297 5.29994C26.0227 4.56148 27.1993 4.56148 27.925 5.29994Z"
|
||||
fill="#1570EF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M22.256 1.59299C22.9822 2.33095 22.983 3.52823 22.2578 4.26718L8.45055 18.3358C7.72533 19.0748 6.54871 19.0756 5.82251 18.3376C5.09631 17.5996 5.09552 16.4024 5.82075 15.6634L19.6279 1.59478C20.3532 0.855827 21.5298 0.855022 22.256 1.59299Z"
|
||||
fill="#1570EF" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M8.58225 6.09619C9.30671 6.83592 9.30469 8.0332 8.57772 8.77038L3.17006 14.2541C2.4431 14.9913 1.26649 14.9893 0.542025 14.2495C-0.182438 13.5098 -0.180413 12.3125 0.546548 11.5753L5.95421 6.09159C6.68117 5.3544 7.85778 5.35646 8.58225 6.09619Z"
|
||||
fill="#53B1FD" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M11.893 0.624023C12.9193 0.624023 13.7513 1.47063 13.7513 2.51497V2.70406C13.7513 3.7484 12.9193 4.59501 11.893 4.59501C10.8667 4.59501 10.0347 3.7484 10.0347 2.70406V2.51497C10.0347 1.47063 10.8667 0.624023 11.893 0.624023Z"
|
||||
fill="#B2DDFF" />
|
||||
</svg>
|
After Width: | Height: | Size: 3.0 KiB |
25
sdk/python/test/libs/auth.py
Normal file
25
sdk/python/test/libs/auth.py
Normal file
@ -0,0 +1,25 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from requests.auth import AuthBase
|
||||
|
||||
|
||||
class RAGFlowHttpApiAuth(AuthBase):
|
||||
def __init__(self, token):
|
||||
self._token = token
|
||||
|
||||
def __call__(self, r):
|
||||
r.headers["Authorization"] = f'Bearer {self._token}'
|
||||
return r
|
@ -0,0 +1,47 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import requests
|
||||
|
||||
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
||||
API_URL = f'{HOST_ADDRESS}/api/v1/datasets'
|
||||
HEADERS = {"Content-Type": "application/json"}
|
||||
|
||||
|
||||
INVALID_API_TOKEN = "invalid_key_123"
|
||||
DATASET_NAME_LIMIT = 128
|
||||
|
||||
|
||||
def create_dataset(auth, payload):
|
||||
res = requests.post(url=API_URL, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_dataset(auth, params):
|
||||
res = requests.get(url=API_URL, headers=HEADERS, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_dataset(auth, dataset_id, payload):
|
||||
res = requests.put(url=f"{API_URL}/{dataset_id}",
|
||||
headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_dataset(auth, payload=None):
|
||||
res = requests.delete(url=API_URL, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
@ -0,0 +1,25 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import pytest
|
||||
|
||||
from common import delete_dataset
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def clear_datasets(get_http_api_auth):
|
||||
yield
|
||||
delete_dataset(get_http_api_auth)
|
@ -0,0 +1,256 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
import base64
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from common import create_dataset, INVALID_API_TOKEN, DATASET_NAME_LIMIT
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
def test_invalid_auth(self):
|
||||
INVALID_API_KEY = RAGFlowHttpApiAuth(INVALID_API_TOKEN)
|
||||
res = create_dataset(INVALID_API_KEY, {"name": "auth_test"})
|
||||
|
||||
assert res["code"] == 109
|
||||
assert res["message"] == 'Authentication error: API key is invalid!'
|
||||
|
||||
|
||||
class TestDatasetCreation:
|
||||
@pytest.mark.parametrize("payload, expected_code", [
|
||||
({"name": "valid_name"}, 0),
|
||||
({"name": "a"*(DATASET_NAME_LIMIT+1)}, 102),
|
||||
({"name": 0}, 100),
|
||||
({"name": ""}, 102),
|
||||
({"name": "duplicated_name"}, 102),
|
||||
({"name": "case_insensitive"}, 102),
|
||||
])
|
||||
def test_basic_scenarios(self, get_http_api_auth, payload, expected_code):
|
||||
if payload["name"] == "duplicated_name":
|
||||
create_dataset(get_http_api_auth, payload)
|
||||
elif payload["name"] == "case_insensitive":
|
||||
create_dataset(get_http_api_auth, {
|
||||
"name": payload["name"].upper()})
|
||||
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert res["data"]["name"] == payload["name"]
|
||||
|
||||
if payload["name"] in ["duplicated_name", "case_insensitive"]:
|
||||
assert res["message"] == "Duplicated dataset name in creating dataset."
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_dataset_10k(self, get_http_api_auth):
|
||||
for i in range(10000):
|
||||
payload = {"name": f"dataset_{i}"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, f"Failed to create dataset {i}"
|
||||
|
||||
|
||||
class TestAdvancedConfigurations:
|
||||
def test_avatar(self, get_http_api_auth, request):
|
||||
def encode_avatar(image_path):
|
||||
with Path.open(image_path, "rb") as file:
|
||||
binary_data = file.read()
|
||||
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
|
||||
return base64_encoded
|
||||
|
||||
payload = {
|
||||
"name": "avatar_test",
|
||||
"avatar": encode_avatar(Path(request.config.rootdir) / 'test/data/logo.svg')
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
def test_description(self, get_http_api_auth):
|
||||
payload = {
|
||||
"name": "description_test",
|
||||
"description": "a" * 65536
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.parametrize("name, permission, expected_code", [
|
||||
("me", "me", 0),
|
||||
("team", "team", 0),
|
||||
pytest.param("empty_permission", "", 0,
|
||||
marks=pytest.mark.xfail(reason='issue#5709')),
|
||||
("me_upercase", "ME", 102),
|
||||
("team_upercase", "TEAM", 102),
|
||||
("other_permission", "other_permission", 102)
|
||||
])
|
||||
def test_permission(self, get_http_api_auth, name, permission, expected_code):
|
||||
payload = {
|
||||
"name": name,
|
||||
"permission": permission
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0 and permission != "":
|
||||
assert res["data"]["permission"] == permission
|
||||
if permission == "":
|
||||
assert res["data"]["permission"] == "me"
|
||||
|
||||
@pytest.mark.parametrize("name, chunk_method, expected_code", [
|
||||
("naive", "naive", 0),
|
||||
("manual", "manual", 0),
|
||||
("qa", "qa", 0),
|
||||
("table", "table", 0),
|
||||
("paper", "paper", 0),
|
||||
("book", "book", 0),
|
||||
("laws", "laws", 0),
|
||||
("presentation", "presentation", 0),
|
||||
("picture", "picture", 0),
|
||||
("one", "one", 0),
|
||||
("picknowledge_graphture", "knowledge_graph", 0),
|
||||
("email", "email", 0),
|
||||
("tag", "tag", 0),
|
||||
pytest.param("empty_chunk_method", "", 0,
|
||||
marks=pytest.mark.xfail(reason='issue#5709')),
|
||||
("other_chunk_method", "other_chunk_method", 102)
|
||||
])
|
||||
def test_chunk_method(self, get_http_api_auth, name, chunk_method, expected_code):
|
||||
payload = {
|
||||
"name": name,
|
||||
"chunk_method": chunk_method
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0 and chunk_method != "":
|
||||
assert res["data"]["chunk_method"] == chunk_method
|
||||
if chunk_method == "":
|
||||
assert res["data"]["chunk_method"] == "naive"
|
||||
|
||||
@pytest.mark.parametrize("name, embedding_model, expected_code", [
|
||||
("BAAI/bge-large-zh-v1.5",
|
||||
"BAAI/bge-large-zh-v1.5", 0),
|
||||
("BAAI/bge-base-en-v1.5",
|
||||
"BAAI/bge-base-en-v1.5", 0),
|
||||
("BAAI/bge-large-en-v1.5",
|
||||
"BAAI/bge-large-en-v1.5", 0),
|
||||
("BAAI/bge-small-en-v1.5",
|
||||
"BAAI/bge-small-en-v1.5", 0),
|
||||
("BAAI/bge-small-zh-v1.5",
|
||||
"BAAI/bge-small-zh-v1.5", 0),
|
||||
("jinaai/jina-embeddings-v2-base-en",
|
||||
"jinaai/jina-embeddings-v2-base-en", 0),
|
||||
("jinaai/jina-embeddings-v2-small-en",
|
||||
"jinaai/jina-embeddings-v2-small-en", 0),
|
||||
("nomic-ai/nomic-embed-text-v1.5",
|
||||
"nomic-ai/nomic-embed-text-v1.5", 0),
|
||||
("sentence-transformers/all-MiniLM-L6-v2",
|
||||
"sentence-transformers/all-MiniLM-L6-v2", 0),
|
||||
("text-embedding-v2",
|
||||
"text-embedding-v2", 0),
|
||||
("text-embedding-v3",
|
||||
"text-embedding-v3", 0),
|
||||
("maidalun1020/bce-embedding-base_v1",
|
||||
"maidalun1020/bce-embedding-base_v1", 0),
|
||||
("other_embedding_model",
|
||||
"other_embedding_model", 102)
|
||||
])
|
||||
def test_embedding_model(self, get_http_api_auth, name, embedding_model, expected_code):
|
||||
payload = {
|
||||
"name": name,
|
||||
"embedding_model": embedding_model
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert res["data"]["embedding_model"] == embedding_model
|
||||
|
||||
@pytest.mark.parametrize("name, chunk_method, parser_config, expected_code", [
|
||||
("naive_default", "naive",
|
||||
{"chunk_token_count": 128,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"html4excel": False,
|
||||
"delimiter": "\n!?。;!?",
|
||||
"task_page_size": 12,
|
||||
"raptor": {"use_raptor": False}
|
||||
},
|
||||
0),
|
||||
("naive_empty", "naive", {}, 0),
|
||||
pytest.param("naive_chunk_token_count_negative", "naive",
|
||||
{"chunk_token_count": -1},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_chunk_token_count_zero", "naive",
|
||||
{"chunk_token_count": 0},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_chunk_token_count_float", "naive",
|
||||
{"chunk_token_count": 3.14},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_chunk_token_count_max", "naive",
|
||||
{"chunk_token_count": 1024*1024*1024},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_chunk_token_count_str", "naive",
|
||||
{"chunk_token_count": '1024'},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
("naive_layout_recognize_DeepDOC", "naive",
|
||||
{"layout_recognize": "DeepDOC"}, 0),
|
||||
("naive_layout_recognize_Naive", "naive",
|
||||
{"layout_recognize": "Naive"}, 0),
|
||||
("naive_html4excel_true", "naive", {"html4excel": True}, 0),
|
||||
("naive_html4excel_false", "naive", {"html4excel": False}, 0),
|
||||
pytest.param("naive_html4excel_not_bool", "naive", {
|
||||
"html4excel": 1}, 102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
("naive_delimiter_empty", "naive", {"delimiter": ""}, 0),
|
||||
("naive_delimiter_backticks", "naive", {"delimiter": "`##`"}, 0),
|
||||
pytest.param("naive_delimiterl_not_str", "naive", {
|
||||
"delimiterl": 1}, 102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_task_page_size_negative", "naive",
|
||||
{"task_page_size": -1},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_task_page_size_zero", "naive",
|
||||
{"task_page_size": 0},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_task_page_size_float", "naive",
|
||||
{"task_page_size": 3.14},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_task_page_size_max", "naive",
|
||||
{"task_page_size": 1024*1024*1024},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
pytest.param("naive_task_page_size_str", "naive",
|
||||
{"task_page_size": '1024'},
|
||||
102, marks=pytest.mark.xfail(reason='issue#5719')),
|
||||
("naive_raptor_true", "naive", {"raptor": {"use_raptor": True}}, 0),
|
||||
("naive_raptor_false", "naive", {"raptor": {"use_raptor": False}}, 0),
|
||||
("knowledge_graph_entity_types_default", "knowledge_graph", {
|
||||
"entity_types": ["organization", "person", "location", "event", "time"]}, 0),
|
||||
pytest.param("knowledge_graph_entity_types_not_list", "knowledge_graph", {
|
||||
"entity_types": "organization,person,location,event,time"}, 102, marks=pytest.mark.xfail(reason='issue#5719'))
|
||||
])
|
||||
def test_parser_configs(self, get_http_api_auth, name, chunk_method, parser_config, expected_code):
|
||||
payload = {
|
||||
"name": name,
|
||||
"chunk_method": chunk_method,
|
||||
"parser_config": parser_config
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
# print(res)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0 and parser_config != {}:
|
||||
for k, v in parser_config.items():
|
||||
assert res["data"]["parser_config"][k] == v
|
||||
if parser_config == {}:
|
||||
assert res["data"]["parser_config"] == {"chunk_token_num": 128,
|
||||
"delimiter": "\\n!?;。;!?",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False}}
|
Loading…
x
Reference in New Issue
Block a user