From 97695dd55b987b12641739da20872e7e92e15eb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adem=C3=ADlson=20F=2E=20Tonato?= <ademilsonft@outlook.com>
Date: Fri, 14 Mar 2025 19:53:57 +0000
Subject: [PATCH] refator: dry request and error handling

---
 apps/python-sdk/firecrawl/firecrawl.py | 209 +++++++++++++++----------
 1 file changed, 123 insertions(+), 86 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 3bc1aa9b..d62312c6 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -1624,21 +1624,35 @@ class FirecrawlApp:
         except:
             raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
         
-
-        if response.status_code == 402:
-            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
-        elif response.status_code == 408:
-            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
-        elif response.status_code == 409:
-            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
-        elif response.status_code == 500:
-            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
-        else:
-            message = f"Unexpected error during {action}: Status code {response.status_code}. {error_message} - {error_details}"
+        message = self._get_error_message(response.status_code, action, error_message, error_details)
 
         # Raise an HTTPError with the custom message and attach the response
         raise requests.exceptions.HTTPError(message, response=response)
 
+    def _get_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
+        """
+        Generate a standardized error message based on HTTP status code.
+        
+        Args:
+            status_code (int): The HTTP status code from the response
+            action (str): Description of the action that was being performed
+            error_message (str): The error message from the API response
+            error_details (str): Additional error details from the API response
+            
+        Returns:
+            str: A formatted error message
+        """
+        if status_code == 402:
+            return f"Payment Required: Failed to {action}. {error_message} - {error_details}"
+        elif status_code == 408:
+            return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
+        elif status_code == 409:
+            return f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
+        elif status_code == 500:
+            return f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
+        else:
+            return f"Unexpected error during {action}: Status code {status_code}. {error_message} - {error_details}"
+
     def deep_research(
             self,
             query: str,
@@ -1905,86 +1919,96 @@ class AsyncFirecrawlApp(FirecrawlApp):
     Asynchronous version of FirecrawlApp that implements async methods using aiohttp.
     Provides non-blocking alternatives to all FirecrawlApp operations.
     """
-    
-    async def _async_post_request(
+
+    async def _async_request(
             self,
+            method: str,
             url: str,
-            data: Dict[str, Any],
             headers: Dict[str, str],
+            data: Optional[Dict[str, Any]] = None,
             retries: int = 3,
             backoff_factor: float = 0.5) -> Dict[str, Any]:
         """
+        Generic async request method with exponential backoff retry logic.
+
+        Args:
+            method (str): The HTTP method to use (e.g., "GET" or "POST").
+            url (str): The URL to send the request to.
+            headers (Dict[str, str]): Headers to include in the request.
+            data (Optional[Dict[str, Any]]): The JSON data to include in the request body (only for POST requests).
+            retries (int): Maximum number of retry attempts (default: 3).
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
+                Delay will be backoff_factor * (2 ** retry_count).
+
+        Returns:
+            Dict[str, Any]: The parsed JSON response from the server.
+
+        Raises:
+            aiohttp.ClientError: If the request fails after all retries.
+            Exception: If max retries are exceeded or other errors occur.
+        """
+        async with aiohttp.ClientSession() as session:
+            for attempt in range(retries):
+                try:
+                    async with session.request(
+                        method=method, url=url, headers=headers, json=data
+                    ) as response:
+                        if response.status == 502:
+                            await asyncio.sleep(backoff_factor * (2 ** attempt))
+                            continue
+                        if response.status >= 300:
+                            await self._handle_error(response, f"make {method} request")
+                        return await response.json()
+                except aiohttp.ClientError as e:
+                    if attempt == retries - 1:
+                        raise e
+                    await asyncio.sleep(backoff_factor * (2 ** attempt))
+            raise Exception("Max retries exceeded")
+
+    async def _async_post_request(
+            self, url: str, data: Dict[str, Any], headers: Dict[str, str],
+            retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]:
+        """
         Make an async POST request with exponential backoff retry logic.
 
         Args:
-            url (str): The URL to send the POST request to
-            data (Dict[str, Any]): The JSON data to include in the request body
-            headers (Dict[str, str]): Headers to include in the request
-            retries (int): Maximum number of retry attempts (default: 3)
-            backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
-                Delay will be backoff_factor * (2 ** retry_count)
+            url (str): The URL to send the POST request to.
+            data (Dict[str, Any]): The JSON data to include in the request body.
+            headers (Dict[str, str]): Headers to include in the request.
+            retries (int): Maximum number of retry attempts (default: 3).
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
+                Delay will be backoff_factor * (2 ** retry_count).
 
         Returns:
-            Dict[str, Any]: The parsed JSON response from the server
+            Dict[str, Any]: The parsed JSON response from the server.
 
         Raises:
-            aiohttp.ClientError: If the request fails after all retries
-            Exception: If max retries are exceeded or other errors occur
+            aiohttp.ClientError: If the request fails after all retries.
+            Exception: If max retries are exceeded or other errors occur.
         """
-        async with aiohttp.ClientSession() as session:
-            for attempt in range(retries):
-                try:
-                    async with session.post(url, headers=headers, json=data) as response:
-                        if response.status == 502:
-                            await asyncio.sleep(backoff_factor * (2 ** attempt))
-                            continue
-                        if response.status >= 300: 
-                            await self._handle_error(response, "make POST request")
-                        return await response.json()
-                except aiohttp.ClientError as e:
-                    if attempt == retries - 1:
-                        raise e
-                    await asyncio.sleep(backoff_factor * (2 ** attempt))
-            raise Exception("Max retries exceeded")
+        return await self._async_request("POST", url, headers, data, retries, backoff_factor)
 
     async def _async_get_request(
-            self,
-            url: str,
-            headers: Dict[str, str],
-            retries: int = 3,
-            backoff_factor: float = 0.5) -> Dict[str, Any]:
+            self, url: str, headers: Dict[str, str],
+            retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]:
         """
         Make an async GET request with exponential backoff retry logic.
 
         Args:
-            url (str): The URL to send the GET request to
-            headers (Dict[str, str]): Headers to include in the request
-            retries (int): Maximum number of retry attempts (default: 3)
-            backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
-                Delay will be backoff_factor * (2 ** retry_count)
+            url (str): The URL to send the GET request to.
+            headers (Dict[str, str]): Headers to include in the request.
+            retries (int): Maximum number of retry attempts (default: 3).
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
+                Delay will be backoff_factor * (2 ** retry_count).
 
         Returns:
-            Dict[str, Any]: The parsed JSON response from the server
+            Dict[str, Any]: The parsed JSON response from the server.
 
         Raises:
-            aiohttp.ClientError: If the request fails after all retries
-            Exception: If max retries are exceeded or other errors occur
+            aiohttp.ClientError: If the request fails after all retries.
+            Exception: If max retries are exceeded or other errors occur.
         """
-        async with aiohttp.ClientSession() as session:
-            for attempt in range(retries):
-                try:
-                    async with session.get(url, headers=headers) as response:
-                        if response.status == 502:
-                            await asyncio.sleep(backoff_factor * (2 ** attempt))
-                            continue
-                        if response.status >= 300:  # Accept any 2xx status code as success
-                            await self._handle_error(response, "make GET request")
-                        return await response.json()
-                except aiohttp.ClientError as e:
-                    if attempt == retries - 1:
-                        raise e
-                    await asyncio.sleep(backoff_factor * (2 ** attempt))
-            raise Exception("Max retries exceeded")
+        return await self._async_request("GET", url, headers, None, retries, backoff_factor)
 
     async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None:
         """
@@ -2009,19 +2033,25 @@ class AsyncFirecrawlApp(FirecrawlApp):
         except:
             raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
 
-        if response.status == 402:
-            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
-        elif response.status == 408:
-            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
-        elif response.status == 409:
-            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
-        elif response.status == 500:
-            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
-        else:
-            message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
+        message = await self._get_async_error_message(response.status, action, error_message, error_details)
 
         raise aiohttp.ClientError(message)
 
+    async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
+        """
+        Generate a standardized error message based on HTTP status code for async operations.
+        
+        Args:
+            status_code (int): The HTTP status code from the response
+            action (str): Description of the action that was being performed
+            error_message (str): The error message from the API response
+            error_details (str): Additional error details from the API response
+            
+        Returns:
+            str: A formatted error message
+        """
+        return self._get_error_message(status_code, action, error_message, error_details)
+
     async def crawl_url_and_watch(
             self,
             url: str,
@@ -3248,15 +3278,22 @@ class AsyncCrawlWatcher(CrawlWatcher):
         except:
             raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
 
-        if response.status == 402:
-            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
-        elif response.status == 408:
-            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
-        elif response.status == 409:
-            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
-        elif response.status == 500:
-            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
-        else:
-            message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
+        # Use the app's method to get the error message
+        message = await self.app._get_async_error_message(response.status, action, error_message, error_details)
 
         raise aiohttp.ClientError(message)
+
+    async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
+        """
+        Generate a standardized error message based on HTTP status code for async operations.
+        
+        Args:
+            status_code (int): The HTTP status code from the response
+            action (str): Description of the action that was being performed
+            error_message (str): The error message from the API response
+            error_details (str): Additional error details from the API response
+            
+        Returns:
+            str: A formatted error message
+        """
+        return self._get_error_message(status_code, action, error_message, error_details)