mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 05:19:06 +08:00
refator: dry request and error handling
This commit is contained in:
parent
e7db5a2d5b
commit
97695dd55b
@ -1624,21 +1624,35 @@ class FirecrawlApp:
|
|||||||
except:
|
except:
|
||||||
raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
|
raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
|
||||||
|
|
||||||
|
message = self._get_error_message(response.status_code, action, error_message, error_details)
|
||||||
if response.status_code == 402:
|
|
||||||
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
|
||||||
elif response.status_code == 408:
|
|
||||||
message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
|
||||||
elif response.status_code == 409:
|
|
||||||
message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
|
|
||||||
elif response.status_code == 500:
|
|
||||||
message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
|
|
||||||
else:
|
|
||||||
message = f"Unexpected error during {action}: Status code {response.status_code}. {error_message} - {error_details}"
|
|
||||||
|
|
||||||
# Raise an HTTPError with the custom message and attach the response
|
# Raise an HTTPError with the custom message and attach the response
|
||||||
raise requests.exceptions.HTTPError(message, response=response)
|
raise requests.exceptions.HTTPError(message, response=response)
|
||||||
|
|
||||||
|
def _get_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate a standardized error message based on HTTP status code.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status_code (int): The HTTP status code from the response
|
||||||
|
action (str): Description of the action that was being performed
|
||||||
|
error_message (str): The error message from the API response
|
||||||
|
error_details (str): Additional error details from the API response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A formatted error message
|
||||||
|
"""
|
||||||
|
if status_code == 402:
|
||||||
|
return f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
||||||
|
elif status_code == 408:
|
||||||
|
return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
||||||
|
elif status_code == 409:
|
||||||
|
return f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
|
||||||
|
elif status_code == 500:
|
||||||
|
return f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
|
||||||
|
else:
|
||||||
|
return f"Unexpected error during {action}: Status code {status_code}. {error_message} - {error_details}"
|
||||||
|
|
||||||
def deep_research(
|
def deep_research(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -1906,40 +1920,44 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
Provides non-blocking alternatives to all FirecrawlApp operations.
|
Provides non-blocking alternatives to all FirecrawlApp operations.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def _async_post_request(
|
async def _async_request(
|
||||||
self,
|
self,
|
||||||
|
method: str,
|
||||||
url: str,
|
url: str,
|
||||||
data: Dict[str, Any],
|
|
||||||
headers: Dict[str, str],
|
headers: Dict[str, str],
|
||||||
|
data: Optional[Dict[str, Any]] = None,
|
||||||
retries: int = 3,
|
retries: int = 3,
|
||||||
backoff_factor: float = 0.5) -> Dict[str, Any]:
|
backoff_factor: float = 0.5) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Make an async POST request with exponential backoff retry logic.
|
Generic async request method with exponential backoff retry logic.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url (str): The URL to send the POST request to
|
method (str): The HTTP method to use (e.g., "GET" or "POST").
|
||||||
data (Dict[str, Any]): The JSON data to include in the request body
|
url (str): The URL to send the request to.
|
||||||
headers (Dict[str, str]): Headers to include in the request
|
headers (Dict[str, str]): Headers to include in the request.
|
||||||
retries (int): Maximum number of retry attempts (default: 3)
|
data (Optional[Dict[str, Any]]): The JSON data to include in the request body (only for POST requests).
|
||||||
backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
|
retries (int): Maximum number of retry attempts (default: 3).
|
||||||
Delay will be backoff_factor * (2 ** retry_count)
|
backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
|
||||||
|
Delay will be backoff_factor * (2 ** retry_count).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, Any]: The parsed JSON response from the server
|
Dict[str, Any]: The parsed JSON response from the server.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
aiohttp.ClientError: If the request fails after all retries
|
aiohttp.ClientError: If the request fails after all retries.
|
||||||
Exception: If max retries are exceeded or other errors occur
|
Exception: If max retries are exceeded or other errors occur.
|
||||||
"""
|
"""
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
async with session.post(url, headers=headers, json=data) as response:
|
async with session.request(
|
||||||
|
method=method, url=url, headers=headers, json=data
|
||||||
|
) as response:
|
||||||
if response.status == 502:
|
if response.status == 502:
|
||||||
await asyncio.sleep(backoff_factor * (2 ** attempt))
|
await asyncio.sleep(backoff_factor * (2 ** attempt))
|
||||||
continue
|
continue
|
||||||
if response.status >= 300:
|
if response.status >= 300:
|
||||||
await self._handle_error(response, "make POST request")
|
await self._handle_error(response, f"make {method} request")
|
||||||
return await response.json()
|
return await response.json()
|
||||||
except aiohttp.ClientError as e:
|
except aiohttp.ClientError as e:
|
||||||
if attempt == retries - 1:
|
if attempt == retries - 1:
|
||||||
@ -1947,44 +1965,50 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
await asyncio.sleep(backoff_factor * (2 ** attempt))
|
await asyncio.sleep(backoff_factor * (2 ** attempt))
|
||||||
raise Exception("Max retries exceeded")
|
raise Exception("Max retries exceeded")
|
||||||
|
|
||||||
|
async def _async_post_request(
|
||||||
|
self, url: str, data: Dict[str, Any], headers: Dict[str, str],
|
||||||
|
retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Make an async POST request with exponential backoff retry logic.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The URL to send the POST request to.
|
||||||
|
data (Dict[str, Any]): The JSON data to include in the request body.
|
||||||
|
headers (Dict[str, str]): Headers to include in the request.
|
||||||
|
retries (int): Maximum number of retry attempts (default: 3).
|
||||||
|
backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
|
||||||
|
Delay will be backoff_factor * (2 ** retry_count).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: The parsed JSON response from the server.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
aiohttp.ClientError: If the request fails after all retries.
|
||||||
|
Exception: If max retries are exceeded or other errors occur.
|
||||||
|
"""
|
||||||
|
return await self._async_request("POST", url, headers, data, retries, backoff_factor)
|
||||||
|
|
||||||
async def _async_get_request(
|
async def _async_get_request(
|
||||||
self,
|
self, url: str, headers: Dict[str, str],
|
||||||
url: str,
|
retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]:
|
||||||
headers: Dict[str, str],
|
|
||||||
retries: int = 3,
|
|
||||||
backoff_factor: float = 0.5) -> Dict[str, Any]:
|
|
||||||
"""
|
"""
|
||||||
Make an async GET request with exponential backoff retry logic.
|
Make an async GET request with exponential backoff retry logic.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url (str): The URL to send the GET request to
|
url (str): The URL to send the GET request to.
|
||||||
headers (Dict[str, str]): Headers to include in the request
|
headers (Dict[str, str]): Headers to include in the request.
|
||||||
retries (int): Maximum number of retry attempts (default: 3)
|
retries (int): Maximum number of retry attempts (default: 3).
|
||||||
backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
|
backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
|
||||||
Delay will be backoff_factor * (2 ** retry_count)
|
Delay will be backoff_factor * (2 ** retry_count).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, Any]: The parsed JSON response from the server
|
Dict[str, Any]: The parsed JSON response from the server.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
aiohttp.ClientError: If the request fails after all retries
|
aiohttp.ClientError: If the request fails after all retries.
|
||||||
Exception: If max retries are exceeded or other errors occur
|
Exception: If max retries are exceeded or other errors occur.
|
||||||
"""
|
"""
|
||||||
async with aiohttp.ClientSession() as session:
|
return await self._async_request("GET", url, headers, None, retries, backoff_factor)
|
||||||
for attempt in range(retries):
|
|
||||||
try:
|
|
||||||
async with session.get(url, headers=headers) as response:
|
|
||||||
if response.status == 502:
|
|
||||||
await asyncio.sleep(backoff_factor * (2 ** attempt))
|
|
||||||
continue
|
|
||||||
if response.status >= 300: # Accept any 2xx status code as success
|
|
||||||
await self._handle_error(response, "make GET request")
|
|
||||||
return await response.json()
|
|
||||||
except aiohttp.ClientError as e:
|
|
||||||
if attempt == retries - 1:
|
|
||||||
raise e
|
|
||||||
await asyncio.sleep(backoff_factor * (2 ** attempt))
|
|
||||||
raise Exception("Max retries exceeded")
|
|
||||||
|
|
||||||
async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None:
|
async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None:
|
||||||
"""
|
"""
|
||||||
@ -2009,19 +2033,25 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
except:
|
except:
|
||||||
raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
|
raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
|
||||||
|
|
||||||
if response.status == 402:
|
message = await self._get_async_error_message(response.status, action, error_message, error_details)
|
||||||
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
|
||||||
elif response.status == 408:
|
|
||||||
message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
|
||||||
elif response.status == 409:
|
|
||||||
message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
|
|
||||||
elif response.status == 500:
|
|
||||||
message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
|
|
||||||
else:
|
|
||||||
message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
|
|
||||||
|
|
||||||
raise aiohttp.ClientError(message)
|
raise aiohttp.ClientError(message)
|
||||||
|
|
||||||
|
async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate a standardized error message based on HTTP status code for async operations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status_code (int): The HTTP status code from the response
|
||||||
|
action (str): Description of the action that was being performed
|
||||||
|
error_message (str): The error message from the API response
|
||||||
|
error_details (str): Additional error details from the API response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A formatted error message
|
||||||
|
"""
|
||||||
|
return self._get_error_message(status_code, action, error_message, error_details)
|
||||||
|
|
||||||
async def crawl_url_and_watch(
|
async def crawl_url_and_watch(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
@ -3248,15 +3278,22 @@ class AsyncCrawlWatcher(CrawlWatcher):
|
|||||||
except:
|
except:
|
||||||
raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
|
raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
|
||||||
|
|
||||||
if response.status == 402:
|
# Use the app's method to get the error message
|
||||||
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
message = await self.app._get_async_error_message(response.status, action, error_message, error_details)
|
||||||
elif response.status == 408:
|
|
||||||
message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
|
|
||||||
elif response.status == 409:
|
|
||||||
message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
|
|
||||||
elif response.status == 500:
|
|
||||||
message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
|
|
||||||
else:
|
|
||||||
message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
|
|
||||||
|
|
||||||
raise aiohttp.ClientError(message)
|
raise aiohttp.ClientError(message)
|
||||||
|
|
||||||
|
async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate a standardized error message based on HTTP status code for async operations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status_code (int): The HTTP status code from the response
|
||||||
|
action (str): Description of the action that was being performed
|
||||||
|
error_message (str): The error message from the API response
|
||||||
|
error_details (str): Additional error details from the API response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A formatted error message
|
||||||
|
"""
|
||||||
|
return self._get_error_message(status_code, action, error_message, error_details)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user