mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-03 05:40:38 +08:00
fix(python-sdk): add JSON parse error reporting clarity
This commit is contained in:
parent
d5929af010
commit
805bfa457d
@ -120,7 +120,10 @@ class FirecrawlApp:
|
|||||||
json=scrape_params,
|
json=scrape_params,
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
response = response.json()
|
try:
|
||||||
|
response = response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if response['success'] and 'data' in response:
|
if response['success'] and 'data' in response:
|
||||||
return response['data']
|
return response['data']
|
||||||
elif "error" in response:
|
elif "error" in response:
|
||||||
@ -159,7 +162,10 @@ class FirecrawlApp:
|
|||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise Exception(f"Request failed with status code {response.status_code}")
|
raise Exception(f"Request failed with status code {response.status_code}")
|
||||||
|
|
||||||
return response.json()
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
|
|
||||||
def crawl_url(self, url: str,
|
def crawl_url(self, url: str,
|
||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
@ -194,7 +200,10 @@ class FirecrawlApp:
|
|||||||
json_data.update(params)
|
json_data.update(params)
|
||||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
id = response.json().get('id')
|
try:
|
||||||
|
id = response.json().get('id')
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
return self._monitor_job_status(id, headers, poll_interval)
|
return self._monitor_job_status(id, headers, poll_interval)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -223,7 +232,10 @@ class FirecrawlApp:
|
|||||||
json_data.update(params)
|
json_data.update(params)
|
||||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, 'start crawl job')
|
self._handle_error(response, 'start crawl job')
|
||||||
|
|
||||||
@ -245,7 +257,10 @@ class FirecrawlApp:
|
|||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
status_data = response.json()
|
try:
|
||||||
|
status_data = response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if status_data['status'] == 'completed':
|
if status_data['status'] == 'completed':
|
||||||
if 'data' in status_data:
|
if 'data' in status_data:
|
||||||
data = status_data['data']
|
data = status_data['data']
|
||||||
@ -261,7 +276,10 @@ class FirecrawlApp:
|
|||||||
if status_response.status_code != 200:
|
if status_response.status_code != 200:
|
||||||
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
||||||
break
|
break
|
||||||
next_data = status_response.json()
|
try:
|
||||||
|
next_data = status_response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
data.extend(next_data.get('data', []))
|
data.extend(next_data.get('data', []))
|
||||||
status_data = next_data
|
status_data = next_data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -304,7 +322,10 @@ class FirecrawlApp:
|
|||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
|
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, "cancel crawl job")
|
self._handle_error(response, "cancel crawl job")
|
||||||
|
|
||||||
@ -352,7 +373,10 @@ class FirecrawlApp:
|
|||||||
json=json_data,
|
json=json_data,
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
response = response.json()
|
try:
|
||||||
|
response = response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if response['success'] and 'links' in response:
|
if response['success'] and 'links' in response:
|
||||||
return response
|
return response
|
||||||
elif 'error' in response:
|
elif 'error' in response:
|
||||||
@ -395,7 +419,10 @@ class FirecrawlApp:
|
|||||||
json_data.update(params)
|
json_data.update(params)
|
||||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
id = response.json().get('id')
|
try:
|
||||||
|
id = response.json().get('id')
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
return self._monitor_job_status(id, headers, poll_interval)
|
return self._monitor_job_status(id, headers, poll_interval)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -424,7 +451,10 @@ class FirecrawlApp:
|
|||||||
json_data.update(params)
|
json_data.update(params)
|
||||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, 'start batch scrape job')
|
self._handle_error(response, 'start batch scrape job')
|
||||||
|
|
||||||
@ -464,7 +494,10 @@ class FirecrawlApp:
|
|||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
status_data = response.json()
|
try:
|
||||||
|
status_data = response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if status_data['status'] == 'completed':
|
if status_data['status'] == 'completed':
|
||||||
if 'data' in status_data:
|
if 'data' in status_data:
|
||||||
data = status_data['data']
|
data = status_data['data']
|
||||||
@ -480,7 +513,10 @@ class FirecrawlApp:
|
|||||||
if status_response.status_code != 200:
|
if status_response.status_code != 200:
|
||||||
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
||||||
break
|
break
|
||||||
next_data = status_response.json()
|
try:
|
||||||
|
next_data = status_response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
data.extend(next_data.get('data', []))
|
data.extend(next_data.get('data', []))
|
||||||
status_data = next_data
|
status_data = next_data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -550,7 +586,10 @@ class FirecrawlApp:
|
|||||||
headers
|
headers
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
data = response.json()
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if data['success']:
|
if data['success']:
|
||||||
job_id = data.get('id')
|
job_id = data.get('id')
|
||||||
if not job_id:
|
if not job_id:
|
||||||
@ -563,7 +602,10 @@ class FirecrawlApp:
|
|||||||
headers
|
headers
|
||||||
)
|
)
|
||||||
if status_response.status_code == 200:
|
if status_response.status_code == 200:
|
||||||
status_data = status_response.json()
|
try:
|
||||||
|
status_data = status_response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if status_data['status'] == 'completed':
|
if status_data['status'] == 'completed':
|
||||||
if status_data['success']:
|
if status_data['success']:
|
||||||
return status_data
|
return status_data
|
||||||
@ -601,7 +643,10 @@ class FirecrawlApp:
|
|||||||
try:
|
try:
|
||||||
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
|
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, "get extract status")
|
self._handle_error(response, "get extract status")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -641,7 +686,10 @@ class FirecrawlApp:
|
|||||||
try:
|
try:
|
||||||
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
|
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, "async extract")
|
self._handle_error(response, "async extract")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -771,16 +819,22 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
status_response = self._get_request(api_url, headers)
|
status_response = self._get_request(api_url, headers)
|
||||||
if status_response.status_code == 200:
|
if status_response.status_code == 200:
|
||||||
status_data = status_response.json()
|
try:
|
||||||
|
status_data = status_response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if status_data['status'] == 'completed':
|
if status_data['status'] == 'completed':
|
||||||
if 'data' in status_data:
|
if 'data' in status_data:
|
||||||
data = status_data['data']
|
data = status_data['data']
|
||||||
while 'next' in status_data:
|
while 'next' in status_data:
|
||||||
if len(status_data['data']) == 0:
|
if len(status_data['data']) == 0:
|
||||||
break
|
break
|
||||||
status_response = self._get_request(status_data['next'], headers)
|
status_response = self._get_request(status_data['next'], headers)
|
||||||
status_data = status_response.json()
|
try:
|
||||||
data.extend(status_data.get('data', []))
|
status_data = status_response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
|
data.extend(status_data.get('data', []))
|
||||||
status_data['data'] = data
|
status_data['data'] = data
|
||||||
return status_data
|
return status_data
|
||||||
else:
|
else:
|
||||||
@ -804,8 +858,12 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: An exception with a message containing the status code and error details from the response.
|
Exception: An exception with a message containing the status code and error details from the response.
|
||||||
"""
|
"""
|
||||||
error_message = response.json().get('error', 'No error message provided.')
|
try:
|
||||||
error_details = response.json().get('details', 'No additional error details provided.')
|
error_message = response.json().get('error', 'No error message provided.')
|
||||||
|
error_details = response.json().get('details', 'No additional error details provided.')
|
||||||
|
except:
|
||||||
|
raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
|
||||||
|
|
||||||
|
|
||||||
if response.status_code == 402:
|
if response.status_code == 402:
|
||||||
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user