mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-06-04 11:14:10 +08:00
fix: document truncation and loss in notion document sync (#5631)
Co-authored-by: Aurelius Huang <cm.huang@aftership.com>
This commit is contained in:
parent
f8aaa57f31
commit
f546db5437
@ -140,11 +140,10 @@ class NotionExtractor(BaseExtractor):
|
||||
|
||||
def _get_notion_block_data(self, page_id: str) -> list[str]:
|
||||
result_lines_arr = []
|
||||
cur_block_id = page_id
|
||||
start_cursor = None
|
||||
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=page_id)
|
||||
while True:
|
||||
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id)
|
||||
query_dict: dict[str, Any] = {}
|
||||
|
||||
query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor}
|
||||
res = requests.request(
|
||||
"GET",
|
||||
block_url,
|
||||
@ -153,7 +152,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Content-Type": "application/json",
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
json=query_dict
|
||||
params=query_dict
|
||||
)
|
||||
data = res.json()
|
||||
for result in data["results"]:
|
||||
@ -191,16 +190,16 @@ class NotionExtractor(BaseExtractor):
|
||||
if data["next_cursor"] is None:
|
||||
break
|
||||
else:
|
||||
cur_block_id = data["next_cursor"]
|
||||
start_cursor = data["next_cursor"]
|
||||
return result_lines_arr
|
||||
|
||||
def _read_block(self, block_id: str, num_tabs: int = 0) -> str:
|
||||
"""Read a block."""
|
||||
result_lines_arr = []
|
||||
cur_block_id = block_id
|
||||
start_cursor = None
|
||||
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=block_id)
|
||||
while True:
|
||||
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id)
|
||||
query_dict: dict[str, Any] = {}
|
||||
query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor}
|
||||
|
||||
res = requests.request(
|
||||
"GET",
|
||||
@ -210,7 +209,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Content-Type": "application/json",
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
json=query_dict
|
||||
params=query_dict
|
||||
)
|
||||
data = res.json()
|
||||
if 'results' not in data or data["results"] is None:
|
||||
@ -249,7 +248,7 @@ class NotionExtractor(BaseExtractor):
|
||||
if data["next_cursor"] is None:
|
||||
break
|
||||
else:
|
||||
cur_block_id = data["next_cursor"]
|
||||
start_cursor = data["next_cursor"]
|
||||
|
||||
result_lines = "\n".join(result_lines_arr)
|
||||
return result_lines
|
||||
@ -258,10 +257,10 @@ class NotionExtractor(BaseExtractor):
|
||||
"""Read table rows."""
|
||||
done = False
|
||||
result_lines_arr = []
|
||||
cur_block_id = block_id
|
||||
start_cursor = None
|
||||
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=block_id)
|
||||
while not done:
|
||||
block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id)
|
||||
query_dict: dict[str, Any] = {}
|
||||
query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor}
|
||||
|
||||
res = requests.request(
|
||||
"GET",
|
||||
@ -271,7 +270,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Content-Type": "application/json",
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
json=query_dict
|
||||
params=query_dict
|
||||
)
|
||||
data = res.json()
|
||||
# get table headers text
|
||||
@ -300,7 +299,7 @@ class NotionExtractor(BaseExtractor):
|
||||
done = True
|
||||
break
|
||||
else:
|
||||
cur_block_id = data["next_cursor"]
|
||||
start_cursor = data["next_cursor"]
|
||||
|
||||
result_lines = "\n".join(result_lines_arr)
|
||||
return result_lines
|
||||
|
Loading…
x
Reference in New Issue
Block a user