From 821fdf02b4fc1798e7a7179abc7b691f4d2fabbb Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Tue, 3 Dec 2024 19:02:03 +0800 Subject: [PATCH] Fix parsing JSON file error (#3829) ### What problem does this PR solve? Close issue: #3828 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Signed-off-by: jinhai --- deepdoc/parser/json_parser.py | 4 ++-- rag/app/naive.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/deepdoc/parser/json_parser.py b/deepdoc/parser/json_parser.py index 2c05614bf..1dd620d44 100644 --- a/deepdoc/parser/json_parser.py +++ b/deepdoc/parser/json_parser.py @@ -92,9 +92,9 @@ class RAGFlowJsonParser: """Splits JSON into a list of JSON chunks""" if convert_lists: - chunks = self._json_split(self._list_to_dict_preprocessing(json_data)) + chunks = self._json_split(self._list_to_dict_preprocessing(json_data), None, None) else: - chunks = self._json_split(json_data) + chunks = self._json_split(json_data, None, None) # Remove the last chunk if it's empty if not chunks[-1]: diff --git a/rag/app/naive.py b/rag/app/naive.py index 6983582cf..f9f1ca879 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -258,7 +258,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, elif re.search(r"\.json$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") - sections = JsonParser(int(parser_config.get("chunk_token_num", 128)))(binary) + chunk_token_num = int(parser_config.get("chunk_token_num", 128)) + sections = JsonParser(chunk_token_num)(binary) sections = [(_, "") for _ in sections if _] callback(0.8, "Finish parsing.")