fix(python-sdk): add JSON parse error reporting clarity

This commit is contained in:
Gergő Móricz 2025-01-17 16:16:41 +01:00
parent d5929af010
commit 805bfa457d

View File

@ -120,7 +120,10 @@ class FirecrawlApp:
json=scrape_params, json=scrape_params,
) )
if response.status_code == 200: if response.status_code == 200:
response = response.json() try:
response = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if response['success'] and 'data' in response: if response['success'] and 'data' in response:
return response['data'] return response['data']
elif "error" in response: elif "error" in response:
@ -159,7 +162,10 @@ class FirecrawlApp:
if response.status_code != 200: if response.status_code != 200:
raise Exception(f"Request failed with status code {response.status_code}") raise Exception(f"Request failed with status code {response.status_code}")
return response.json() try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
def crawl_url(self, url: str, def crawl_url(self, url: str,
params: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None,
@ -194,7 +200,10 @@ class FirecrawlApp:
json_data.update(params) json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
id = response.json().get('id') try:
id = response.json().get('id')
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
return self._monitor_job_status(id, headers, poll_interval) return self._monitor_job_status(id, headers, poll_interval)
else: else:
@ -223,7 +232,10 @@ class FirecrawlApp:
json_data.update(params) json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
return response.json() try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else: else:
self._handle_error(response, 'start crawl job') self._handle_error(response, 'start crawl job')
@ -245,7 +257,10 @@ class FirecrawlApp:
headers = self._prepare_headers() headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}{endpoint}', headers) response = self._get_request(f'{self.api_url}{endpoint}', headers)
if response.status_code == 200: if response.status_code == 200:
status_data = response.json() try:
status_data = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed': if status_data['status'] == 'completed':
if 'data' in status_data: if 'data' in status_data:
data = status_data['data'] data = status_data['data']
@ -261,7 +276,10 @@ class FirecrawlApp:
if status_response.status_code != 200: if status_response.status_code != 200:
logger.error(f"Failed to fetch next page: {status_response.status_code}") logger.error(f"Failed to fetch next page: {status_response.status_code}")
break break
next_data = status_response.json() try:
next_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
data.extend(next_data.get('data', [])) data.extend(next_data.get('data', []))
status_data = next_data status_data = next_data
except Exception as e: except Exception as e:
@ -304,7 +322,10 @@ class FirecrawlApp:
headers = self._prepare_headers() headers = self._prepare_headers()
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers) response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
if response.status_code == 200: if response.status_code == 200:
return response.json() try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else: else:
self._handle_error(response, "cancel crawl job") self._handle_error(response, "cancel crawl job")
@ -352,7 +373,10 @@ class FirecrawlApp:
json=json_data, json=json_data,
) )
if response.status_code == 200: if response.status_code == 200:
response = response.json() try:
response = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if response['success'] and 'links' in response: if response['success'] and 'links' in response:
return response return response
elif 'error' in response: elif 'error' in response:
@ -395,7 +419,10 @@ class FirecrawlApp:
json_data.update(params) json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
id = response.json().get('id') try:
id = response.json().get('id')
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
return self._monitor_job_status(id, headers, poll_interval) return self._monitor_job_status(id, headers, poll_interval)
else: else:
@ -424,7 +451,10 @@ class FirecrawlApp:
json_data.update(params) json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
return response.json() try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else: else:
self._handle_error(response, 'start batch scrape job') self._handle_error(response, 'start batch scrape job')
@ -464,7 +494,10 @@ class FirecrawlApp:
headers = self._prepare_headers() headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}{endpoint}', headers) response = self._get_request(f'{self.api_url}{endpoint}', headers)
if response.status_code == 200: if response.status_code == 200:
status_data = response.json() try:
status_data = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed': if status_data['status'] == 'completed':
if 'data' in status_data: if 'data' in status_data:
data = status_data['data'] data = status_data['data']
@ -480,7 +513,10 @@ class FirecrawlApp:
if status_response.status_code != 200: if status_response.status_code != 200:
logger.error(f"Failed to fetch next page: {status_response.status_code}") logger.error(f"Failed to fetch next page: {status_response.status_code}")
break break
next_data = status_response.json() try:
next_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
data.extend(next_data.get('data', [])) data.extend(next_data.get('data', []))
status_data = next_data status_data = next_data
except Exception as e: except Exception as e:
@ -550,7 +586,10 @@ class FirecrawlApp:
headers headers
) )
if response.status_code == 200: if response.status_code == 200:
data = response.json() try:
data = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if data['success']: if data['success']:
job_id = data.get('id') job_id = data.get('id')
if not job_id: if not job_id:
@ -563,7 +602,10 @@ class FirecrawlApp:
headers headers
) )
if status_response.status_code == 200: if status_response.status_code == 200:
status_data = status_response.json() try:
status_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed': if status_data['status'] == 'completed':
if status_data['success']: if status_data['success']:
return status_data return status_data
@ -601,7 +643,10 @@ class FirecrawlApp:
try: try:
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers) response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
if response.status_code == 200: if response.status_code == 200:
return response.json() try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else: else:
self._handle_error(response, "get extract status") self._handle_error(response, "get extract status")
except Exception as e: except Exception as e:
@ -641,7 +686,10 @@ class FirecrawlApp:
try: try:
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers) response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
if response.status_code == 200: if response.status_code == 200:
return response.json() try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else: else:
self._handle_error(response, "async extract") self._handle_error(response, "async extract")
except Exception as e: except Exception as e:
@ -771,16 +819,22 @@ class FirecrawlApp:
status_response = self._get_request(api_url, headers) status_response = self._get_request(api_url, headers)
if status_response.status_code == 200: if status_response.status_code == 200:
status_data = status_response.json() try:
status_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed': if status_data['status'] == 'completed':
if 'data' in status_data: if 'data' in status_data:
data = status_data['data'] data = status_data['data']
while 'next' in status_data: while 'next' in status_data:
if len(status_data['data']) == 0: if len(status_data['data']) == 0:
break break
status_response = self._get_request(status_data['next'], headers) status_response = self._get_request(status_data['next'], headers)
status_data = status_response.json() try:
data.extend(status_data.get('data', [])) status_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
data.extend(status_data.get('data', []))
status_data['data'] = data status_data['data'] = data
return status_data return status_data
else: else:
@ -804,8 +858,12 @@ class FirecrawlApp:
Raises: Raises:
Exception: An exception with a message containing the status code and error details from the response. Exception: An exception with a message containing the status code and error details from the response.
""" """
error_message = response.json().get('error', 'No error message provided.') try:
error_details = response.json().get('details', 'No additional error details provided.') error_message = response.json().get('error', 'No error message provided.')
error_details = response.json().get('details', 'No additional error details provided.')
except:
raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
if response.status_code == 402: if response.status_code == 402:
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}" message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"