From 805bfa457dc54b33335c2417eff7ca985c243982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 17 Jan 2025 16:16:41 +0100 Subject: [PATCH] fix(python-sdk): add JSON parse error reporting clarity --- apps/python-sdk/firecrawl/firecrawl.py | 106 +++++++++++++++++++------ 1 file changed, 82 insertions(+), 24 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 41f8badf..94539c2a 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -120,7 +120,10 @@ class FirecrawlApp: json=scrape_params, ) if response.status_code == 200: - response = response.json() + try: + response = response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if response['success'] and 'data' in response: return response['data'] elif "error" in response: @@ -159,7 +162,10 @@ class FirecrawlApp: if response.status_code != 200: raise Exception(f"Request failed with status code {response.status_code}") - return response.json() + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') def crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, @@ -194,7 +200,10 @@ class FirecrawlApp: json_data.update(params) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: - id = response.json().get('id') + try: + id = response.json().get('id') + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') return self._monitor_job_status(id, headers, poll_interval) else: @@ -223,7 +232,10 @@ class FirecrawlApp: json_data.update(params) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: - return response.json() + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') else: self._handle_error(response, 'start crawl job') @@ -245,7 +257,10 @@ class FirecrawlApp: headers = self._prepare_headers() response = self._get_request(f'{self.api_url}{endpoint}', headers) if response.status_code == 200: - status_data = response.json() + try: + status_data = response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if status_data['status'] == 'completed': if 'data' in status_data: data = status_data['data'] @@ -261,7 +276,10 @@ class FirecrawlApp: if status_response.status_code != 200: logger.error(f"Failed to fetch next page: {status_response.status_code}") break - next_data = status_response.json() + try: + next_data = status_response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') data.extend(next_data.get('data', [])) status_data = next_data except Exception as e: @@ -304,7 +322,10 @@ class FirecrawlApp: headers = self._prepare_headers() response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers) if response.status_code == 200: - return response.json() + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') else: self._handle_error(response, "cancel crawl job") @@ -352,7 +373,10 @@ class FirecrawlApp: json=json_data, ) if response.status_code == 200: - response = response.json() + try: + response = response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if response['success'] and 'links' in response: return response elif 'error' in response: @@ -395,7 +419,10 @@ class FirecrawlApp: json_data.update(params) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: - id = response.json().get('id') + try: + id = response.json().get('id') + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') return self._monitor_job_status(id, headers, poll_interval) else: @@ -424,7 +451,10 @@ class FirecrawlApp: json_data.update(params) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: - return response.json() + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') else: self._handle_error(response, 'start batch scrape job') @@ -464,7 +494,10 @@ class FirecrawlApp: headers = self._prepare_headers() response = self._get_request(f'{self.api_url}{endpoint}', headers) if response.status_code == 200: - status_data = response.json() + try: + status_data = response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if status_data['status'] == 'completed': if 'data' in status_data: data = status_data['data'] @@ -480,7 +513,10 @@ class FirecrawlApp: if status_response.status_code != 200: logger.error(f"Failed to fetch next page: {status_response.status_code}") break - next_data = status_response.json() + try: + next_data = status_response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') data.extend(next_data.get('data', [])) status_data = next_data except Exception as e: @@ -550,7 +586,10 @@ class FirecrawlApp: headers ) if response.status_code == 200: - data = response.json() + try: + data = response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if data['success']: job_id = data.get('id') if not job_id: @@ -563,7 +602,10 @@ class FirecrawlApp: headers ) if status_response.status_code == 200: - status_data = status_response.json() + try: + status_data = status_response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if status_data['status'] == 'completed': if status_data['success']: return status_data @@ -601,7 +643,10 @@ class FirecrawlApp: try: response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers) if response.status_code == 200: - return response.json() + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') else: self._handle_error(response, "get extract status") except Exception as e: @@ -641,7 +686,10 @@ class FirecrawlApp: try: response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers) if response.status_code == 200: - return response.json() + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') else: self._handle_error(response, "async extract") except Exception as e: @@ -771,16 +819,22 @@ class FirecrawlApp: status_response = self._get_request(api_url, headers) if status_response.status_code == 200: - status_data = status_response.json() + try: + status_data = status_response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') if status_data['status'] == 'completed': if 'data' in status_data: data = status_data['data'] while 'next' in status_data: - if len(status_data['data']) == 0: - break - status_response = self._get_request(status_data['next'], headers) - status_data = status_response.json() - data.extend(status_data.get('data', [])) + if len(status_data['data']) == 0: + break + status_response = self._get_request(status_data['next'], headers) + try: + status_data = status_response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') + data.extend(status_data.get('data', [])) status_data['data'] = data return status_data else: @@ -804,8 +858,12 @@ class FirecrawlApp: Raises: Exception: An exception with a message containing the status code and error details from the response. """ - error_message = response.json().get('error', 'No error message provided.') - error_details = response.json().get('details', 'No additional error details provided.') + try: + error_message = response.json().get('error', 'No error message provided.') + error_details = response.json().get('details', 'No additional error details provided.') + except: + raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response) + if response.status_code == 402: message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"