Fix parsing JSON file error (#3829)

### What problem does this PR solve?

Close issue: #3828

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Signed-off-by: jinhai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai 2024-12-03 19:02:03 +08:00 committed by GitHub
parent 54980337e4
commit 821fdf02b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 3 deletions

View File

@ -92,9 +92,9 @@ class RAGFlowJsonParser:
"""Splits JSON into a list of JSON chunks"""
if convert_lists:
chunks = self._json_split(self._list_to_dict_preprocessing(json_data))
chunks = self._json_split(self._list_to_dict_preprocessing(json_data), None, None)
else:
chunks = self._json_split(json_data)
chunks = self._json_split(json_data, None, None)
# Remove the last chunk if it's empty
if not chunks[-1]:

View File

@ -258,7 +258,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
elif re.search(r"\.json$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
sections = JsonParser(int(parser_config.get("chunk_token_num", 128)))(binary)
chunk_token_num = int(parser_config.get("chunk_token_num", 128))
sections = JsonParser(chunk_token_num)(binary)
sections = [(_, "") for _ in sections if _]
callback(0.8, "Finish parsing.")