From 97695dd55b987b12641739da20872e7e92e15eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adem=C3=ADlson=20F=2E=20Tonato?= Date: Fri, 14 Mar 2025 19:53:57 +0000 Subject: [PATCH] refator: dry request and error handling --- apps/python-sdk/firecrawl/firecrawl.py | 209 +++++++++++++++---------- 1 file changed, 123 insertions(+), 86 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 3bc1aa9b..d62312c6 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -1624,21 +1624,35 @@ class FirecrawlApp: except: raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response) - - if response.status_code == 402: - message = f"Payment Required: Failed to {action}. {error_message} - {error_details}" - elif response.status_code == 408: - message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}" - elif response.status_code == 409: - message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}" - elif response.status_code == 500: - message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}" - else: - message = f"Unexpected error during {action}: Status code {response.status_code}. {error_message} - {error_details}" + message = self._get_error_message(response.status_code, action, error_message, error_details) # Raise an HTTPError with the custom message and attach the response raise requests.exceptions.HTTPError(message, response=response) + def _get_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str: + """ + Generate a standardized error message based on HTTP status code. + + Args: + status_code (int): The HTTP status code from the response + action (str): Description of the action that was being performed + error_message (str): The error message from the API response + error_details (str): Additional error details from the API response + + Returns: + str: A formatted error message + """ + if status_code == 402: + return f"Payment Required: Failed to {action}. {error_message} - {error_details}" + elif status_code == 408: + return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}" + elif status_code == 409: + return f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}" + elif status_code == 500: + return f"Internal Server Error: Failed to {action}. {error_message} - {error_details}" + else: + return f"Unexpected error during {action}: Status code {status_code}. {error_message} - {error_details}" + def deep_research( self, query: str, @@ -1905,86 +1919,96 @@ class AsyncFirecrawlApp(FirecrawlApp): Asynchronous version of FirecrawlApp that implements async methods using aiohttp. Provides non-blocking alternatives to all FirecrawlApp operations. """ - - async def _async_post_request( + + async def _async_request( self, + method: str, url: str, - data: Dict[str, Any], headers: Dict[str, str], + data: Optional[Dict[str, Any]] = None, retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]: """ + Generic async request method with exponential backoff retry logic. + + Args: + method (str): The HTTP method to use (e.g., "GET" or "POST"). + url (str): The URL to send the request to. + headers (Dict[str, str]): Headers to include in the request. + data (Optional[Dict[str, Any]]): The JSON data to include in the request body (only for POST requests). + retries (int): Maximum number of retry attempts (default: 3). + backoff_factor (float): Factor to calculate delay between retries (default: 0.5). + Delay will be backoff_factor * (2 ** retry_count). + + Returns: + Dict[str, Any]: The parsed JSON response from the server. + + Raises: + aiohttp.ClientError: If the request fails after all retries. + Exception: If max retries are exceeded or other errors occur. + """ + async with aiohttp.ClientSession() as session: + for attempt in range(retries): + try: + async with session.request( + method=method, url=url, headers=headers, json=data + ) as response: + if response.status == 502: + await asyncio.sleep(backoff_factor * (2 ** attempt)) + continue + if response.status >= 300: + await self._handle_error(response, f"make {method} request") + return await response.json() + except aiohttp.ClientError as e: + if attempt == retries - 1: + raise e + await asyncio.sleep(backoff_factor * (2 ** attempt)) + raise Exception("Max retries exceeded") + + async def _async_post_request( + self, url: str, data: Dict[str, Any], headers: Dict[str, str], + retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]: + """ Make an async POST request with exponential backoff retry logic. Args: - url (str): The URL to send the POST request to - data (Dict[str, Any]): The JSON data to include in the request body - headers (Dict[str, str]): Headers to include in the request - retries (int): Maximum number of retry attempts (default: 3) - backoff_factor (float): Factor to calculate delay between retries (default: 0.5) - Delay will be backoff_factor * (2 ** retry_count) + url (str): The URL to send the POST request to. + data (Dict[str, Any]): The JSON data to include in the request body. + headers (Dict[str, str]): Headers to include in the request. + retries (int): Maximum number of retry attempts (default: 3). + backoff_factor (float): Factor to calculate delay between retries (default: 0.5). + Delay will be backoff_factor * (2 ** retry_count). Returns: - Dict[str, Any]: The parsed JSON response from the server + Dict[str, Any]: The parsed JSON response from the server. Raises: - aiohttp.ClientError: If the request fails after all retries - Exception: If max retries are exceeded or other errors occur + aiohttp.ClientError: If the request fails after all retries. + Exception: If max retries are exceeded or other errors occur. """ - async with aiohttp.ClientSession() as session: - for attempt in range(retries): - try: - async with session.post(url, headers=headers, json=data) as response: - if response.status == 502: - await asyncio.sleep(backoff_factor * (2 ** attempt)) - continue - if response.status >= 300: - await self._handle_error(response, "make POST request") - return await response.json() - except aiohttp.ClientError as e: - if attempt == retries - 1: - raise e - await asyncio.sleep(backoff_factor * (2 ** attempt)) - raise Exception("Max retries exceeded") + return await self._async_request("POST", url, headers, data, retries, backoff_factor) async def _async_get_request( - self, - url: str, - headers: Dict[str, str], - retries: int = 3, - backoff_factor: float = 0.5) -> Dict[str, Any]: + self, url: str, headers: Dict[str, str], + retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]: """ Make an async GET request with exponential backoff retry logic. Args: - url (str): The URL to send the GET request to - headers (Dict[str, str]): Headers to include in the request - retries (int): Maximum number of retry attempts (default: 3) - backoff_factor (float): Factor to calculate delay between retries (default: 0.5) - Delay will be backoff_factor * (2 ** retry_count) + url (str): The URL to send the GET request to. + headers (Dict[str, str]): Headers to include in the request. + retries (int): Maximum number of retry attempts (default: 3). + backoff_factor (float): Factor to calculate delay between retries (default: 0.5). + Delay will be backoff_factor * (2 ** retry_count). Returns: - Dict[str, Any]: The parsed JSON response from the server + Dict[str, Any]: The parsed JSON response from the server. Raises: - aiohttp.ClientError: If the request fails after all retries - Exception: If max retries are exceeded or other errors occur + aiohttp.ClientError: If the request fails after all retries. + Exception: If max retries are exceeded or other errors occur. """ - async with aiohttp.ClientSession() as session: - for attempt in range(retries): - try: - async with session.get(url, headers=headers) as response: - if response.status == 502: - await asyncio.sleep(backoff_factor * (2 ** attempt)) - continue - if response.status >= 300: # Accept any 2xx status code as success - await self._handle_error(response, "make GET request") - return await response.json() - except aiohttp.ClientError as e: - if attempt == retries - 1: - raise e - await asyncio.sleep(backoff_factor * (2 ** attempt)) - raise Exception("Max retries exceeded") + return await self._async_request("GET", url, headers, None, retries, backoff_factor) async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None: """ @@ -2009,19 +2033,25 @@ class AsyncFirecrawlApp(FirecrawlApp): except: raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}') - if response.status == 402: - message = f"Payment Required: Failed to {action}. {error_message} - {error_details}" - elif response.status == 408: - message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}" - elif response.status == 409: - message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}" - elif response.status == 500: - message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}" - else: - message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}" + message = await self._get_async_error_message(response.status, action, error_message, error_details) raise aiohttp.ClientError(message) + async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str: + """ + Generate a standardized error message based on HTTP status code for async operations. + + Args: + status_code (int): The HTTP status code from the response + action (str): Description of the action that was being performed + error_message (str): The error message from the API response + error_details (str): Additional error details from the API response + + Returns: + str: A formatted error message + """ + return self._get_error_message(status_code, action, error_message, error_details) + async def crawl_url_and_watch( self, url: str, @@ -3248,15 +3278,22 @@ class AsyncCrawlWatcher(CrawlWatcher): except: raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}') - if response.status == 402: - message = f"Payment Required: Failed to {action}. {error_message} - {error_details}" - elif response.status == 408: - message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}" - elif response.status == 409: - message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}" - elif response.status == 500: - message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}" - else: - message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}" + # Use the app's method to get the error message + message = await self.app._get_async_error_message(response.status, action, error_message, error_details) raise aiohttp.ClientError(message) + + async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str: + """ + Generate a standardized error message based on HTTP status code for async operations. + + Args: + status_code (int): The HTTP status code from the response + action (str): Description of the action that was being performed + error_message (str): The error message from the API response + error_details (str): Additional error details from the API response + + Returns: + str: A formatted error message + """ + return self._get_error_message(status_code, action, error_message, error_details)