Fix json file parse (#4004)

### What problem does this PR solve?

Fix json file parsing

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Signed-off-by: jinhai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai 2024-12-12 20:34:46 +08:00 committed by GitHub
parent 9ae81b42a3
commit 275b5d14f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,7 @@
import json
from typing import Any
from rag.nlp import find_codec
class RAGFlowJsonParser:
def __init__(
@ -53,7 +54,7 @@ class RAGFlowJsonParser:
def _json_split(
self,
data: dict[str, Any],
data,
current_path: list[str] | None,
chunks: list[dict] | None,
) -> list[dict]:
@ -86,13 +87,14 @@ class RAGFlowJsonParser:
def split_json(
self,
json_data: dict[str, Any],
json_data,
convert_lists: bool = False,
) -> list[dict]:
"""Splits JSON into a list of JSON chunks"""
if convert_lists:
chunks = self._json_split(self._list_to_dict_preprocessing(json_data), None, None)
preprocessed_data = self._list_to_dict_preprocessing(json_data)
chunks = self._json_split(preprocessed_data, None, None)
else:
chunks = self._json_split(json_data, None, None)