mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-20 13:10:05 +08:00
### What problem does this PR solve? Fix #5719 Add data type validation for parser_config ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
7eb417b24f
commit
d16033dd2c
@ -399,9 +399,15 @@ def valid_parser_config(parser_config):
|
|||||||
for k in parser_config.keys():
|
for k in parser_config.keys():
|
||||||
assert k in scopes, f"Abnormal 'parser_config'. Invalid key: {k}"
|
assert k in scopes, f"Abnormal 'parser_config'. Invalid key: {k}"
|
||||||
|
|
||||||
|
assert isinstance(parser_config.get("chunk_token_num", 1), int), "chunk_token_num should be int"
|
||||||
assert 1 <= parser_config.get("chunk_token_num", 1) < 100000000, "chunk_token_num should be in range from 1 to 100000000"
|
assert 1 <= parser_config.get("chunk_token_num", 1) < 100000000, "chunk_token_num should be in range from 1 to 100000000"
|
||||||
|
assert isinstance(parser_config.get("task_page_size", 1), int), "task_page_size should be int"
|
||||||
assert 1 <= parser_config.get("task_page_size", 1) < 100000000, "task_page_size should be in range from 1 to 100000000"
|
assert 1 <= parser_config.get("task_page_size", 1) < 100000000, "task_page_size should be in range from 1 to 100000000"
|
||||||
|
assert isinstance(parser_config.get("auto_keywords", 1), int), "auto_keywords should be int"
|
||||||
assert 0 <= parser_config.get("auto_keywords", 0) < 32, "auto_keywords should be in range from 0 to 32"
|
assert 0 <= parser_config.get("auto_keywords", 0) < 32, "auto_keywords should be in range from 0 to 32"
|
||||||
|
assert isinstance(parser_config.get("auto_questions", 1), int), "auto_questions should be int"
|
||||||
assert 0 <= parser_config.get("auto_questions", 0) < 10, "auto_questions should be in range from 0 to 10"
|
assert 0 <= parser_config.get("auto_questions", 0) < 10, "auto_questions should be in range from 0 to 10"
|
||||||
|
assert isinstance(parser_config.get("topn_tags", 1), int), "topn_tags should be int"
|
||||||
assert 0 <= parser_config.get("topn_tags", 0) < 10, "topn_tags should be in range from 0 to 10"
|
assert 0 <= parser_config.get("topn_tags", 0) < 10, "topn_tags should be in range from 0 to 10"
|
||||||
assert isinstance(parser_config.get("html4excel", False), bool), "html4excel should be True or False"
|
assert isinstance(parser_config.get("html4excel", False), bool), "html4excel should be True or False"
|
||||||
|
assert isinstance(parser_config.get("delimiter", ""), str), "delimiter should be str"
|
||||||
|
@ -220,21 +220,19 @@ class TestAdvancedConfigurations:
|
|||||||
100,
|
100,
|
||||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_chunk_token_num_float",
|
"naive_chunk_token_num_float",
|
||||||
"naive",
|
"naive",
|
||||||
{"chunk_token_num": 3.14},
|
{"chunk_token_num": 3.14},
|
||||||
102,
|
100,
|
||||||
"",
|
"AssertionError('chunk_token_num should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_chunk_token_num_str",
|
"naive_chunk_token_num_str",
|
||||||
"naive",
|
"naive",
|
||||||
{"chunk_token_num": "1024"},
|
{"chunk_token_num": "1024"},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('chunk_token_num should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"naive_layout_recognize_DeepDOC",
|
"naive_layout_recognize_DeepDOC",
|
||||||
@ -261,13 +259,12 @@ class TestAdvancedConfigurations:
|
|||||||
),
|
),
|
||||||
("naive_delimiter_empty", "naive", {"delimiter": ""}, 0, ""),
|
("naive_delimiter_empty", "naive", {"delimiter": ""}, 0, ""),
|
||||||
("naive_delimiter_backticks", "naive", {"delimiter": "`##`"}, 0, ""),
|
("naive_delimiter_backticks", "naive", {"delimiter": "`##`"}, 0, ""),
|
||||||
pytest.param(
|
(
|
||||||
"naive_delimiter_not_str",
|
"naive_delimiter_not_str",
|
||||||
"naive",
|
"naive",
|
||||||
{"delimiter": 1},
|
{"delimiter": 1},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('delimiter should be str')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"naive_task_page_size_negative",
|
"naive_task_page_size_negative",
|
||||||
@ -290,21 +287,19 @@ class TestAdvancedConfigurations:
|
|||||||
100,
|
100,
|
||||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_task_page_size_float",
|
"naive_task_page_size_float",
|
||||||
"naive",
|
"naive",
|
||||||
{"task_page_size": 3.14},
|
{"task_page_size": 3.14},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('task_page_size should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_task_page_size_str",
|
"naive_task_page_size_str",
|
||||||
"naive",
|
"naive",
|
||||||
{"task_page_size": "1024"},
|
{"task_page_size": "1024"},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('task_page_size should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
("naive_raptor_true", "naive", {"raptor": {"use_raptor": True}}, 0, ""),
|
("naive_raptor_true", "naive", {"raptor": {"use_raptor": True}}, 0, ""),
|
||||||
("naive_raptor_false", "naive", {"raptor": {"use_raptor": False}}, 0, ""),
|
("naive_raptor_false", "naive", {"raptor": {"use_raptor": False}}, 0, ""),
|
||||||
@ -329,21 +324,19 @@ class TestAdvancedConfigurations:
|
|||||||
100,
|
100,
|
||||||
"AssertionError('auto_keywords should be in range from 0 to 32')",
|
"AssertionError('auto_keywords should be in range from 0 to 32')",
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_auto_keywords_float",
|
"naive_auto_keywords_float",
|
||||||
"naive",
|
"naive",
|
||||||
{"auto_questions": 3.14},
|
{"auto_keywords": 3.14},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('auto_keywords should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_auto_keywords_str",
|
"naive_auto_keywords_str",
|
||||||
"naive",
|
"naive",
|
||||||
{"auto_keywords": "1024"},
|
{"auto_keywords": "1024"},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('auto_keywords should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"naive_auto_questions_negative",
|
"naive_auto_questions_negative",
|
||||||
@ -359,21 +352,19 @@ class TestAdvancedConfigurations:
|
|||||||
100,
|
100,
|
||||||
"AssertionError('auto_questions should be in range from 0 to 10')",
|
"AssertionError('auto_questions should be in range from 0 to 10')",
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_auto_questions_float",
|
"naive_auto_questions_float",
|
||||||
"naive",
|
"naive",
|
||||||
{"auto_questions": 3.14},
|
{"auto_questions": 3.14},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('auto_questions should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_auto_questions_str",
|
"naive_auto_questions_str",
|
||||||
"naive",
|
"naive",
|
||||||
{"auto_questions": "1024"},
|
{"auto_questions": "1024"},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('auto_questions should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"naive_topn_tags_negative",
|
"naive_topn_tags_negative",
|
||||||
@ -389,21 +380,19 @@ class TestAdvancedConfigurations:
|
|||||||
100,
|
100,
|
||||||
"AssertionError('topn_tags should be in range from 0 to 10')",
|
"AssertionError('topn_tags should be in range from 0 to 10')",
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_topn_tags_float",
|
"naive_topn_tags_float",
|
||||||
"naive",
|
"naive",
|
||||||
{"topn_tags": 3.14},
|
{"topn_tags": 3.14},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('topn_tags should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
pytest.param(
|
(
|
||||||
"naive_topn_tags_str",
|
"naive_topn_tags_str",
|
||||||
"naive",
|
"naive",
|
||||||
{"topn_tags": "1024"},
|
{"topn_tags": "1024"},
|
||||||
100,
|
100,
|
||||||
"",
|
"AssertionError('topn_tags should be int')",
|
||||||
marks=pytest.mark.xfail(reason="issue#5719"),
|
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user