diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index 4ec0b4fc38..7c6101010e 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -140,11 +140,10 @@ class NotionExtractor(BaseExtractor): def _get_notion_block_data(self, page_id: str) -> list[str]: result_lines_arr = [] - cur_block_id = page_id + start_cursor = None + block_url = BLOCK_CHILD_URL_TMPL.format(block_id=page_id) while True: - block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) - query_dict: dict[str, Any] = {} - + query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor} res = requests.request( "GET", block_url, @@ -153,7 +152,7 @@ class NotionExtractor(BaseExtractor): "Content-Type": "application/json", "Notion-Version": "2022-06-28", }, - json=query_dict + params=query_dict ) data = res.json() for result in data["results"]: @@ -191,16 +190,16 @@ class NotionExtractor(BaseExtractor): if data["next_cursor"] is None: break else: - cur_block_id = data["next_cursor"] + start_cursor = data["next_cursor"] return result_lines_arr def _read_block(self, block_id: str, num_tabs: int = 0) -> str: """Read a block.""" result_lines_arr = [] - cur_block_id = block_id + start_cursor = None + block_url = BLOCK_CHILD_URL_TMPL.format(block_id=block_id) while True: - block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) - query_dict: dict[str, Any] = {} + query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor} res = requests.request( "GET", @@ -210,7 +209,7 @@ class NotionExtractor(BaseExtractor): "Content-Type": "application/json", "Notion-Version": "2022-06-28", }, - json=query_dict + params=query_dict ) data = res.json() if 'results' not in data or data["results"] is None: @@ -249,7 +248,7 @@ class NotionExtractor(BaseExtractor): if data["next_cursor"] is None: break else: - cur_block_id = data["next_cursor"] + start_cursor = data["next_cursor"] result_lines = "\n".join(result_lines_arr) return result_lines @@ -258,10 +257,10 @@ class NotionExtractor(BaseExtractor): """Read table rows.""" done = False result_lines_arr = [] - cur_block_id = block_id + start_cursor = None + block_url = BLOCK_CHILD_URL_TMPL.format(block_id=block_id) while not done: - block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) - query_dict: dict[str, Any] = {} + query_dict: dict[str, Any] = {} if not start_cursor else {'start_cursor': start_cursor} res = requests.request( "GET", @@ -271,7 +270,7 @@ class NotionExtractor(BaseExtractor): "Content-Type": "application/json", "Notion-Version": "2022-06-28", }, - json=query_dict + params=query_dict ) data = res.json() # get table headers text @@ -300,7 +299,7 @@ class NotionExtractor(BaseExtractor): done = True break else: - cur_block_id = data["next_cursor"] + start_cursor = data["next_cursor"] result_lines = "\n".join(result_lines_arr) return result_lines