From 6a5a4e5b6f435e01d13a42a65bb7bacdaaec9ef0 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Thu, 13 Mar 2025 11:21:35 -0300
Subject: [PATCH 01/26] improv/types-and-comments-descs

---
 apps/python-sdk/firecrawl/firecrawl.py | 852 +++++++++++++++++++------
 1 file changed, 674 insertions(+), 178 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index d79b174c..d212dea7 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -12,8 +12,9 @@ Classes:
 import logging
 import os
 import time
-from typing import Any, Dict, Optional, List, Union, Callable
+from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar, Generic
 import json
+from datetime import datetime
 
 import requests
 import pydantic
@@ -21,6 +22,212 @@ import websockets
 
 logger : logging.Logger = logging.getLogger("firecrawl")
 
+T = TypeVar('T')
+
+class FirecrawlDocumentMetadata(pydantic.BaseModel):
+    """Metadata for a Firecrawl document."""
+    title: Optional[str] = None
+    description: Optional[str] = None
+    language: Optional[str] = None
+    keywords: Optional[str] = None
+    robots: Optional[str] = None
+    ogTitle: Optional[str] = None
+    ogDescription: Optional[str] = None
+    ogUrl: Optional[str] = None
+    ogImage: Optional[str] = None
+    ogAudio: Optional[str] = None
+    ogDeterminer: Optional[str] = None
+    ogLocale: Optional[str] = None
+    ogLocaleAlternate: Optional[List[str]] = None
+    ogSiteName: Optional[str] = None
+    ogVideo: Optional[str] = None
+    dctermsCreated: Optional[str] = None
+    dcDateCreated: Optional[str] = None
+    dcDate: Optional[str] = None
+    dctermsType: Optional[str] = None
+    dcType: Optional[str] = None
+    dctermsAudience: Optional[str] = None
+    dctermsSubject: Optional[str] = None
+    dcSubject: Optional[str] = None
+    dcDescription: Optional[str] = None
+    dctermsKeywords: Optional[str] = None
+    modifiedTime: Optional[str] = None
+    publishedTime: Optional[str] = None
+    articleTag: Optional[str] = None
+    articleSection: Optional[str] = None
+    sourceURL: Optional[str] = None
+    statusCode: Optional[int] = None
+    error: Optional[str] = None
+
+class ActionsResult(pydantic.BaseModel):
+    """Result of actions performed during scraping."""
+    screenshots: List[str]
+
+class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
+    """Document retrieved or processed by Firecrawl."""
+    url: Optional[str] = None
+    markdown: Optional[str] = None
+    html: Optional[str] = None
+    rawHtml: Optional[str] = None
+    links: Optional[List[str]] = None
+    extract: Optional[T] = None
+    json: Optional[T] = None
+    screenshot: Optional[str] = None
+    metadata: Optional[FirecrawlDocumentMetadata] = None
+    actions: Optional[ActionsResult] = None
+    title: Optional[str] = None  # v1 search only
+    description: Optional[str] = None  # v1 search only
+
+class LocationConfig(pydantic.BaseModel):
+    """Location configuration for scraping."""
+    country: Optional[str] = None
+    languages: Optional[List[str]] = None
+
+class WebhookConfig(pydantic.BaseModel):
+    """Configuration for webhooks."""
+    url: str
+    headers: Optional[Dict[str, str]] = None
+    metadata: Optional[Dict[str, str]] = None
+    events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
+
+class CrawlScrapeOptions(pydantic.BaseModel):
+    """Parameters for scraping operations."""
+    formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None
+    headers: Optional[Dict[str, str]] = None
+    includeTags: Optional[List[str]] = None
+    excludeTags: Optional[List[str]] = None
+    onlyMainContent: Optional[bool] = None
+    waitFor: Optional[int] = None
+    timeout: Optional[int] = None
+    location: Optional[LocationConfig] = None
+    mobile: Optional[bool] = None
+    skipTlsVerification: Optional[bool] = None
+    removeBase64Images: Optional[bool] = None
+    blockAds: Optional[bool] = None
+    proxy: Optional[Literal["basic", "stealth"]] = None
+
+class Action(pydantic.BaseModel):
+    """Action to perform during scraping."""
+    type: Literal["wait", "click", "screenshot", "write", "press", "scroll", "scrape", "executeJavascript"]
+    milliseconds: Optional[int] = None
+    selector: Optional[str] = None
+    fullPage: Optional[bool] = None
+    text: Optional[str] = None
+    key: Optional[str] = None
+    direction: Optional[Literal["up", "down"]] = None
+    script: Optional[str] = None
+
+class ExtractConfig(pydantic.BaseModel):
+    """Configuration for extraction."""
+    prompt: Optional[str] = None
+    schema: Optional[Any] = None
+    systemPrompt: Optional[str] = None
+
+class ScrapeParams(CrawlScrapeOptions):
+    """Parameters for scraping operations."""
+    extract: Optional[ExtractConfig] = None
+    jsonOptions: Optional[ExtractConfig] = None
+    actions: Optional[List[Action]] = None
+
+class ScrapeResponse(FirecrawlDocument[T], Generic[T]):
+    """Response from scraping operations."""
+    success: bool = True
+    warning: Optional[str] = None
+    error: Optional[str] = None
+
+class BatchScrapeResponse(pydantic.BaseModel):
+    """Response from batch scrape operations."""
+    id: Optional[str] = None
+    url: Optional[str] = None
+    success: bool = True
+    error: Optional[str] = None
+    invalidURLs: Optional[List[str]] = None
+
+class BatchScrapeStatusResponse(pydantic.BaseModel):
+    """Response from batch scrape status checks."""
+    success: bool = True
+    status: Literal["scraping", "completed", "failed", "cancelled"]
+    completed: int
+    total: int
+    creditsUsed: int
+    expiresAt: datetime
+    next: Optional[str] = None
+    data: List[FirecrawlDocument]
+
+class CrawlParams(pydantic.BaseModel):
+    """Parameters for crawling operations."""
+    includePaths: Optional[List[str]] = None
+    excludePaths: Optional[List[str]] = None
+    maxDepth: Optional[int] = None
+    maxDiscoveryDepth: Optional[int] = None
+    limit: Optional[int] = None
+    allowBackwardLinks: Optional[bool] = None
+    allowExternalLinks: Optional[bool] = None
+    ignoreSitemap: Optional[bool] = None
+    scrapeOptions: Optional[CrawlScrapeOptions] = None
+    webhook: Optional[Union[str, WebhookConfig]] = None
+    deduplicateSimilarURLs: Optional[bool] = None
+    ignoreQueryParameters: Optional[bool] = None
+    regexOnFullURL: Optional[bool] = None
+
+class CrawlResponse(pydantic.BaseModel):
+    """Response from crawling operations."""
+    id: Optional[str] = None
+    url: Optional[str] = None
+    success: bool = True
+    error: Optional[str] = None
+
+class CrawlStatusResponse(pydantic.BaseModel):
+    """Response from crawl status checks."""
+    success: bool = True
+    status: Literal["scraping", "completed", "failed", "cancelled"]
+    completed: int
+    total: int
+    creditsUsed: int
+    expiresAt: datetime
+    next: Optional[str] = None
+    data: List[FirecrawlDocument]
+
+class CrawlErrorsResponse(pydantic.BaseModel):
+    """Response from crawl/batch scrape error monitoring."""
+    errors: List[Dict[str, str]]  # {id: str, timestamp: str, url: str, error: str}
+    robotsBlocked: List[str]
+
+class MapParams(pydantic.BaseModel):
+    """Parameters for mapping operations."""
+    search: Optional[str] = None
+    ignoreSitemap: Optional[bool] = None
+    includeSubdomains: Optional[bool] = None
+    sitemapOnly: Optional[bool] = None
+    limit: Optional[int] = None
+    timeout: Optional[int] = None
+
+class MapResponse(pydantic.BaseModel):
+    """Response from mapping operations."""
+    success: bool = True
+    links: Optional[List[str]] = None
+    error: Optional[str] = None
+
+class ExtractParams(pydantic.BaseModel):
+    """Parameters for extracting information from URLs."""
+    prompt: Optional[str] = None
+    schema: Optional[Any] = None
+    systemPrompt: Optional[str] = None
+    allowExternalLinks: Optional[bool] = None
+    enableWebSearch: Optional[bool] = None
+    includeSubdomains: Optional[bool] = None
+    origin: Optional[str] = None
+    showSources: Optional[bool] = None
+    scrapeOptions: Optional[CrawlScrapeOptions] = None
+
+class ExtractResponse(pydantic.BaseModel, Generic[T]):
+    """Response from extract operations."""
+    success: bool = True
+    data: Optional[T] = None
+    error: Optional[str] = None
+    warning: Optional[str] = None
+    sources: Optional[List[str]] = None
+
 class SearchParams(pydantic.BaseModel):
     query: str
     limit: Optional[int] = 5
@@ -33,6 +240,13 @@ class SearchParams(pydantic.BaseModel):
     timeout: Optional[int] = 60000
     scrapeOptions: Optional[Dict[str, Any]] = None
 
+class SearchResponse(pydantic.BaseModel):
+    """Response from search operations."""
+    success: bool = True
+    data: List[FirecrawlDocument]
+    warning: Optional[str] = None
+    error: Optional[str] = None
+
 class GenerateLLMsTextParams(pydantic.BaseModel):
     """
     Parameters for the LLMs.txt generation operation.
@@ -73,40 +287,21 @@ class DeepResearchStatusResponse(pydantic.BaseModel):
     sources: List[Dict[str, Any]]
     summaries: List[str]
 
+class GenerateLLMsTextResponse(pydantic.BaseModel):
+    """Response from LLMs.txt generation operations."""
+    success: bool = True
+    id: str
+    error: Optional[str] = None
+
+class GenerateLLMsTextStatusResponse(pydantic.BaseModel):
+    """Status response from LLMs.txt generation operations."""
+    success: bool = True
+    data: Optional[Dict[str, str]] = None  # {llmstxt: str, llmsfulltxt?: str}
+    status: Literal["processing", "completed", "failed"]
+    error: Optional[str] = None
+    expiresAt: str
+
 class FirecrawlApp:
-    class SearchResponse(pydantic.BaseModel):
-        """
-        Response from the search operation.
-        """
-        success: bool
-        data: List[Dict[str, Any]]
-        warning: Optional[str] = None
-        error: Optional[str] = None
-
-    class ExtractParams(pydantic.BaseModel):
-        """
-        Parameters for the extract operation.
-        """
-        prompt: Optional[str] = None
-        schema_: Optional[Any] = pydantic.Field(None, alias='schema')
-        system_prompt: Optional[str] = None
-        allow_external_links: Optional[bool] = False
-        enable_web_search: Optional[bool] = False
-        # Just for backwards compatibility
-        enableWebSearch: Optional[bool] = False
-        show_sources: Optional[bool] = False
-
-
-
-
-    class ExtractResponse(pydantic.BaseModel):
-        """
-        Response from the extract operation.
-        """
-        success: bool
-        data: Optional[Any] = None
-        error: Optional[str] = None
-
     def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
         """
         Initialize the FirecrawlApp instance with API key, API URL.
@@ -125,19 +320,42 @@ class FirecrawlApp:
             
         logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}")
 
-    def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
+    def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> ScrapeResponse[Any]:
         """
-        Scrape the specified URL using the Firecrawl API.
+        Scrape and extract content from a URL.
 
         Args:
-            url (str): The URL to scrape.
-            params (Optional[Dict[str, Any]]): Additional parameters for the scrape request.
+          url (str): Target URL to scrape
+
+          params (Optional[Dict[str, Any]]): See ScrapeParams model for configuration:
+
+            Content Options:
+            * formats - Content types to retrieve (markdown/html/etc)
+            * includeTags - HTML tags to include
+            * excludeTags - HTML tags to exclude
+            * onlyMainContent - Extract main content only
+                  
+            Request Options:
+            * headers - Custom HTTP headers
+            * timeout - Request timeout (ms)
+            * mobile - Use mobile user agent
+            * proxy - Proxy type (basic/stealth)
+                  
+            Extraction Options:
+            * extract - Content extraction settings
+            * jsonOptions - JSON extraction settings
+            * actions - Actions to perform
 
         Returns:
-            Any: The scraped data if the request is successful.
+          ScrapeResponse with:
+          
+          * Requested content formats
+          * Page metadata
+          * Extraction results
+          * Success/error status
 
         Raises:
-            Exception: If the scrape request fails.
+          Exception: If scraping fails
         """
 
         headers = self._prepare_headers()
@@ -193,16 +411,35 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'scrape URL')
 
-    def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> Dict[str, Any]:
+    def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
         """
-        Search for content using the Firecrawl API.
+        Search for content using Firecrawl.
 
         Args:
-            query (str): The search query string.
-            params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters.
+          query (str): Search query string
+
+          params (Optional[Union[Dict[str, Any], SearchParams]]): See SearchParams model:
+
+            Search Options:
+            * limit - Max results (default: 5)
+            * tbs - Time filter (e.g. "qdr:d")
+            * filter - Custom result filter
+                
+            Localization:
+            * lang - Language code (default: "en")
+            * country - Country code (default: "us")
+            * location - Geo-targeting
+            
+            Request Options:
+            * timeout - Request timeout (ms)
+            * scrapeOptions - Result scraping config, check ScrapeParams model for more details
 
         Returns:
-            Dict[str, Any]: The search response containing success status and search results.
+          SearchResponse
+
+
+        Raises:
+          Exception: If search fails
         """
         if params is None:
             params = {}
@@ -230,28 +467,46 @@ class FirecrawlApp:
     def crawl_url(self, url: str,
                   params: Optional[Dict[str, Any]] = None,
                   poll_interval: Optional[int] = 2,
-                  idempotency_key: Optional[str] = None) -> Any:
+                  idempotency_key: Optional[str] = None) -> CrawlStatusResponse:
         """
-        Initiate a crawl job for the specified URL using the Firecrawl API.
+        Crawl a website starting from a URL.
 
         Args:
-            url (str): The URL to crawl.
-            params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
-            poll_interval (Optional[int]): Time in seconds between status checks when waiting for job completion. Defaults to 2 seconds.
-            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
+          url (str): Target URL to start crawling from
+
+          params (Optional[Dict[str, Any]]): See CrawlParams model for configuration:
+
+            URL Discovery:
+            * includePaths - Patterns of URLs to include
+            * excludePaths - Patterns of URLs to exclude
+            * maxDepth - Maximum crawl depth
+            * maxDiscoveryDepth - Maximum depth for finding new URLs
+            * limit - Maximum pages to crawl
+
+            Link Following:
+            * allowBackwardLinks - Follow parent directory links
+            * allowExternalLinks - Follow external domain links  
+            * ignoreSitemap - Skip sitemap.xml processing
+
+            Advanced:
+            * scrapeOptions - Page scraping configuration
+            * webhook - Notification webhook settings
+            * deduplicateSimilarURLs - Remove similar URLs
+            * ignoreQueryParameters - Ignore URL parameters
+            * regexOnFullURL - Apply regex to full URLs
+
+          poll_interval: Seconds between status checks (default: 2)
+          
+          idempotency_key: Request deduplication key
 
         Returns:
-            Dict[str, Any]: A dictionary containing the crawl results. The structure includes:
-                - 'success' (bool): Indicates if the crawl was successful.
-                - 'status' (str): The final status of the crawl job (e.g., 'completed').
-                - 'completed' (int): Number of scraped pages that completed.
-                - 'total' (int): Total number of scraped pages.
-                - 'creditsUsed' (int): Estimated number of API credits used for this crawl.
-                - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the crawl data expires.
-                - 'data' (List[Dict]): List of all the scraped pages.
+          CrawlStatusResponse with:
+          * Crawling status and progress
+          * Crawled page contents
+          * Success/error information
 
         Raises:
-            Exception: If the crawl job initiation or monitoring fails.
+          Exception: If crawl fails
         """
         endpoint = f'/v1/crawl'
         headers = self._prepare_headers(idempotency_key)
@@ -270,20 +525,45 @@ class FirecrawlApp:
             self._handle_error(response, 'start crawl job')
 
 
-    def async_crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
+    def async_crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> CrawlResponse:
         """
-        Initiate a crawl job asynchronously.
+        Start an asynchronous crawl job.
 
         Args:
-            url (str): The URL to crawl.
-            params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
-            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
+            url (str): Target URL to start crawling from
+
+            params (Optional[Dict[str, Any]]): See CrawlParams model:
+
+              URL Discovery:
+              * includePaths - Patterns of URLs to include
+              * excludePaths - Patterns of URLs to exclude
+              * maxDepth - Maximum crawl depth
+              * maxDiscoveryDepth - Maximum depth for finding new URLs
+              * limit - Maximum pages to crawl
+
+              Link Following:
+              * allowBackwardLinks - Follow parent directory links
+              * allowExternalLinks - Follow external domain links  
+              * ignoreSitemap - Skip sitemap.xml processing
+
+              Advanced:
+              * scrapeOptions - Page scraping configuration
+              * webhook - Notification webhook settings
+              * deduplicateSimilarURLs - Remove similar URLs
+              * ignoreQueryParameters - Ignore URL parameters
+              * regexOnFullURL - Apply regex to full URLs
+
+            idempotency_key: Unique key to prevent duplicate requests
 
         Returns:
-            Dict[str, Any]: A dictionary containing the crawl initiation response. The structure includes:
-                - 'success' (bool): Indicates if the crawl initiation was successful.
-                - 'id' (str): The unique identifier for the crawl job.
-                - 'url' (str): The URL to check the status of the crawl job.
+            CrawlResponse with:
+            * success - Whether crawl started successfully
+            * id - Unique identifier for the crawl job
+            * url - Status check URL for the crawl
+            * error - Error message if start failed
+
+        Raises:
+            Exception: If crawl initiation fails
         """
         endpoint = f'/v1/crawl'
         headers = self._prepare_headers(idempotency_key)
@@ -299,18 +579,31 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'start crawl job')
 
-    def check_crawl_status(self, id: str) -> Any:
+    def check_crawl_status(self, id: str) -> CrawlStatusResponse:
         """
-        Check the status of a crawl job using the Firecrawl API.
+        Check the status and results of a crawl job.
 
         Args:
-            id (str): The ID of the crawl job.
+            id: Unique identifier for the crawl job
 
         Returns:
-            Any: The status of the crawl job.
+            CrawlStatusResponse containing:
+
+            Status Information:
+            * status - Current state (scraping/completed/failed/cancelled)
+            * completed - Number of pages crawled
+            * total - Total pages to crawl
+            * creditsUsed - API credits consumed
+            * expiresAt - Data expiration timestamp
+            
+            Results:
+            * data - List of crawled documents
+            * next - URL for next page of results (if paginated)
+            * success - Whether status check succeeded
+            * error - Error message if failed
 
         Raises:
-            Exception: If the status check request fails.
+            Exception: If status check fails
         """
         endpoint = f'/v1/crawl/{id}'
 
@@ -369,7 +662,7 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'check crawl status')
     
-    def check_crawl_errors(self, id: str) -> Dict[str, Any]:
+    def check_crawl_errors(self, id: str) -> CrawlErrorsResponse:
         """
         Returns information about crawl errors.
 
@@ -427,16 +720,32 @@ class FirecrawlApp:
         else:
             raise Exception("Crawl job failed to start")
 
-    def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
+    def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> MapResponse:
         """
-        Perform a map search using the Firecrawl API.
+        Map and discover links from a URL.
 
         Args:
-            url (str): The URL to perform the map search on.
-            params (Optional[Dict[str, Any]]): Additional parameters for the map search.
+            url: Target URL to map
+
+            params: See MapParams model:
+
+                Discovery Options:
+                * search - Filter pattern for URLs
+                * ignoreSitemap - Skip sitemap.xml
+                * includeSubdomains - Include subdomain links
+                * sitemapOnly - Only use sitemap.xml
+                
+                Limits:
+                * limit - Max URLs to return
+                * timeout - Request timeout (ms)
 
         Returns:
-            List[str]: A list of URLs discovered during the map search.
+            MapResponse with:
+            * Discovered URLs
+            * Success/error status
+
+        Raises:
+            Exception: If mapping fails
         """
         endpoint = f'/v1/map'
         headers = self._prepare_headers()
@@ -469,28 +778,44 @@ class FirecrawlApp:
     def batch_scrape_urls(self, urls: List[str],
                   params: Optional[Dict[str, Any]] = None,
                   poll_interval: Optional[int] = 2,
-                  idempotency_key: Optional[str] = None) -> Any:
+                  idempotency_key: Optional[str] = None) -> BatchScrapeStatusResponse:
         """
-        Initiate a batch scrape job for the specified URLs using the Firecrawl API.
+        Batch scrape multiple URLs and monitor until completion.
 
         Args:
-            urls (List[str]): The URLs to scrape.
-            params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
-            poll_interval (Optional[int]): Time in seconds between status checks when waiting for job completion. Defaults to 2 seconds.
-            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
+            urls: URLs to scrape
+
+            params: See ScrapeParams model:
+
+                Content Options:
+                * formats - Content formats to retrieve
+                * includeTags - HTML tags to include
+                * excludeTags - HTML tags to exclude
+                * onlyMainContent - Extract main content only
+                
+                Request Options:
+                * headers - Custom HTTP headers
+                * timeout - Request timeout (ms)
+                * mobile - Use mobile user agent
+                * proxy - Proxy type
+                
+                Extraction Options:
+                * extract - Content extraction config
+                * jsonOptions - JSON extraction config
+                * actions - Actions to perform
+
+            poll_interval: Seconds between status checks (default: 2)
+
+            idempotency_key: Request deduplication key
 
         Returns:
-            Dict[str, Any]: A dictionary containing the scrape results. The structure includes:
-                - 'success' (bool): Indicates if the batch scrape was successful.
-                - 'status' (str): The final status of the batch scrape job (e.g., 'completed').
-                - 'completed' (int): Number of scraped pages that completed.
-                - 'total' (int): Total number of scraped pages.
-                - 'creditsUsed' (int): Estimated number of API credits used for this batch scrape.
-                - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the batch scrape data expires.
-                - 'data' (List[Dict]): List of all the scraped pages.
+            BatchScrapeStatusResponse with:
+            * Scraping status and progress
+            * Scraped content for each URL
+            * Success/error information
 
         Raises:
-            Exception: If the batch scrape job initiation or monitoring fails.
+            Exception: If batch scrape fails
         """
         endpoint = f'/v1/batch/scrape'
         headers = self._prepare_headers(idempotency_key)
@@ -509,9 +834,13 @@ class FirecrawlApp:
             self._handle_error(response, 'start batch scrape job')
 
 
-    def async_batch_scrape_urls(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
+    def async_batch_scrape_urls(
+            self,
+            urls: List[str],
+            params: Optional[Dict[str, Any]] = None,
+            idempotency_key: Optional[str] = None) -> BatchScrapeResponse:
         """
-        Initiate a crawl job asynchronously.
+        Initiate a batch scrape job asynchronously.
 
         Args:
             urls (List[str]): The URLs to scrape.
@@ -519,7 +848,7 @@ class FirecrawlApp:
             idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
 
         Returns:
-            Dict[str, Any]: A dictionary containing the batch scrape initiation response. The structure includes:
+            BatchScrapeResponse: A dictionary containing the batch scrape initiation response. The structure includes:
                 - 'success' (bool): Indicates if the batch scrape initiation was successful.
                 - 'id' (str): The unique identifier for the batch scrape job.
                 - 'url' (str): The URL to check the status of the batch scrape job.
@@ -538,13 +867,17 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'start batch scrape job')
     
-    def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
+    def batch_scrape_urls_and_watch(
+            self,
+            urls: List[str],
+            params: Optional[ScrapeParams] = None,
+            idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
         """
         Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
 
         Args:
             urls (List[str]): The URLs to scrape.
-            params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
+            params (Optional[ScrapeParams]): Additional parameters for the scraper.
             idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
 
         Returns:
@@ -556,7 +889,7 @@ class FirecrawlApp:
         else:
             raise Exception("Batch scrape job failed to start")
     
-    def check_batch_scrape_status(self, id: str) -> Any:
+    def check_batch_scrape_status(self, id: str) -> BatchScrapeStatusResponse:
         """
         Check the status of a batch scrape job using the Firecrawl API.
 
@@ -564,7 +897,7 @@ class FirecrawlApp:
             id (str): The ID of the batch scrape job.
 
         Returns:
-            Any: The status of the batch scrape job.
+            BatchScrapeStatusResponse: The status of the batch scrape job.
 
         Raises:
             Exception: If the status check request fails.
@@ -626,7 +959,7 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'check batch scrape status')
 
-    def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]:
+    def check_batch_scrape_errors(self, id: str) -> CrawlErrorsResponse:
         """
         Returns information about batch scrape errors.
 
@@ -634,7 +967,13 @@ class FirecrawlApp:
             id (str): The ID of the crawl job.
 
         Returns:
-            Dict[str, Any]: Information about crawl errors.
+            CrawlErrorsResponse: A response containing:
+                - errors (List[Dict[str, str]]): List of errors with fields:
+                    - id (str): Error ID
+                    - timestamp (str): When the error occurred
+                    - url (str): URL that caused the error
+                    - error (str): Error message
+                - robotsBlocked (List[str]): List of URLs blocked by robots.txt
         """
         headers = self._prepare_headers()
         response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
@@ -646,16 +985,40 @@ class FirecrawlApp:
         else:
             self._handle_error(response, "check batch scrape errors")
 
-    def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
+    def extract(
+            self,
+            urls: List[str],
+            params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
         """
-        Extracts information from a URL using the Firecrawl API.
+        Extract structured information from URLs.
 
         Args:
-            urls (List[str]): The URLs to extract information from.
-            params (Optional[ExtractParams]): Additional parameters for the extract request.
+            urls: URLs to extract from
+
+            params: See ExtractParams model:
+
+                Extraction Config:
+                * prompt - Custom extraction prompt
+                * schema - JSON schema/Pydantic model
+                * systemPrompt - System context
+                
+                Behavior Options:
+                * allowExternalLinks - Follow external links
+                * enableWebSearch - Enable web search
+                * includeSubdomains - Include subdomains
+                * showSources - Include source URLs
+                
+                Scraping Options:
+                * scrapeOptions - Page scraping config
 
         Returns:
-            Union[ExtractResponse, ErrorResponse]: The response from the extract operation.
+            ExtractResponse with:
+            * Structured data matching schema
+            * Source information if requested
+            * Success/error status
+
+        Raises:
+            ValueError: If prompt/schema missing or extraction fails
         """
         headers = self._prepare_headers()
 
@@ -715,10 +1078,7 @@ class FirecrawlApp:
                             except:
                                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
                             if status_data['status'] == 'completed':
-                                if status_data['success']:
-                                    return status_data
-                                else:
-                                    raise Exception(f'Failed to extract. Error: {status_data["error"]}')
+                                return status_data
                             elif status_data['status'] in ['failed', 'cancelled']:
                                 raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
                         else:
@@ -734,7 +1094,7 @@ class FirecrawlApp:
 
         return {'success': False, 'error': "Internal server error."}
     
-    def get_extract_status(self, job_id: str) -> Dict[str, Any]:
+    def get_extract_status(self, job_id: str) -> ExtractResponse[Any]:
         """
         Retrieve the status of an extract job.
 
@@ -742,7 +1102,7 @@ class FirecrawlApp:
             job_id (str): The ID of the extract job.
 
         Returns:
-            Dict[str, Any]: The status of the extract job.
+            ExtractResponse[Any]: The status of the extract job.
 
         Raises:
             ValueError: If there is an error retrieving the status.
@@ -760,20 +1120,32 @@ class FirecrawlApp:
         except Exception as e:
             raise ValueError(str(e), 500)
 
-    def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
+    def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
         """
         Initiate an asynchronous extract job.
 
         Args:
-            urls (List[str]): The URLs to extract data from.
-            params (Optional[Dict[str, Any]]): Additional parameters for the extract request.
-            idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
+            urls (List[str]): List of URLs to extract information from. Must be valid HTTP/HTTPS URLs.
+            params (Optional[Dict[str, Any]]): Extraction configuration parameters:
+                - prompt (str, optional): Custom prompt for extraction
+                - schema (Any, optional): JSON schema or Pydantic model for structured extraction
+                - systemPrompt (str, optional): System prompt for extraction
+                - allowExternalLinks (bool, optional): Allow following external links
+                - enableWebSearch (bool, optional): Enable web search during extraction
+                - includeSubdomains (bool, optional): Include content from subdomains
+                - origin (str, optional): Source of the extraction request
+                - showSources (bool, optional): Include source URLs in response
+                - scrapeOptions (CrawlScrapeOptions, optional): Configuration for scraping pages
+            idempotency_key (Optional[str]): Unique identifier to prevent duplicate requests.
 
         Returns:
-            Dict[str, Any]: The response from the extract operation.
+            ExtractResponse[Any]: A response containing:
+                - success (bool): Whether the extraction initiation was successful
+                - id (str): The unique identifier for the extract job
+                - error (str, optional): Error message if initiation failed
 
         Raises:
-            ValueError: If there is an error initiating the extract job.
+            ValueError: If neither prompt nor schema is provided, or if there is an error during initiation.
         """
         headers = self._prepare_headers(idempotency_key)
         
@@ -804,24 +1176,32 @@ class FirecrawlApp:
         except Exception as e:
             raise ValueError(str(e), 500)
 
-    def generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> Dict[str, Any]:
+    def generate_llms_text(
+            self,
+            url: str,
+            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
         """
         Generate LLMs.txt for a given URL and poll until completion.
 
         Args:
-            url (str): The URL to generate LLMs.txt from.
-            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Parameters for the LLMs.txt generation.
+            url: Target URL to generate LLMs.txt from
+
+            params: See GenerateLLMsTextParams model:
+
+                Generation Options:
+                * maxUrls - Maximum URLs to process (default: 10)
+                * showFullText - Include full text in output (default: False)
+                * __experimental_stream - Enable streaming of generation progress
 
         Returns:
-            Dict[str, Any]: A dictionary containing the generation results. The structure includes:
-                - 'success' (bool): Indicates if the generation was successful.
-                - 'status' (str): The final status of the generation job.
-                - 'data' (Dict): The generated LLMs.txt data.
-                - 'error' (Optional[str]): Error message if the generation failed.
-                - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the data expires.
+            GenerateLLMsTextStatusResponse with:
+            * Generated LLMs.txt content
+            * Full version if requested
+            * Generation status
+            * Success/error information
 
         Raises:
-            Exception: If the generation job fails or an error occurs during status checks.
+            Exception: If generation fails
         """
         if params is None:
             params = {}
@@ -850,18 +1230,25 @@ class FirecrawlApp:
 
         return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
 
-    def async_generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> Dict[str, Any]:
+    def async_generate_llms_text(
+            self,
+            url: str,
+            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
         """
         Initiate an asynchronous LLMs.txt generation operation.
 
         Args:
-            url (str): The URL to generate LLMs.txt from.
-            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Parameters for the LLMs.txt generation.
+            url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
+            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Generation configuration parameters:
+                - maxUrls (int, optional): Maximum number of URLs to process (default: 10)
+                - showFullText (bool, optional): Include full text in output (default: False)
+                - __experimental_stream (bool, optional): Enable streaming of generation progress
 
         Returns:
-            Dict[str, Any]: A dictionary containing the generation initiation response. The structure includes:
-                - 'success' (bool): Indicates if the generation initiation was successful.
-                - 'id' (str): The unique identifier for the generation job.
+            GenerateLLMsTextResponse: A response containing:
+                - success (bool): Whether the generation initiation was successful
+                - id (str): The unique identifier for the generation job
+                - error (str, optional): Error message if initiation failed
 
         Raises:
             Exception: If the generation job initiation fails.
@@ -891,15 +1278,22 @@ class FirecrawlApp:
 
         return {'success': False, 'error': 'Internal server error'}
 
-    def check_generate_llms_text_status(self, id: str) -> Dict[str, Any]:
+    def check_generate_llms_text_status(self, id: str) -> GenerateLLMsTextStatusResponse:
         """
         Check the status of a LLMs.txt generation operation.
 
         Args:
-            id (str): The ID of the LLMs.txt generation operation.
+            id (str): The unique identifier of the LLMs.txt generation job to check status for.
 
         Returns:
-            Dict[str, Any]: The current status and results of the generation operation.
+            GenerateLLMsTextStatusResponse: A response containing:
+                - success (bool): Whether the generation was successful
+                - status (str): Status of generation ("processing", "completed", "failed")
+                - data (Dict[str, str], optional): Generated text with fields:
+                    - llmstxt (str): Generated LLMs.txt content
+                    - llmsfulltxt (str, optional): Full version if requested
+                - error (str, optional): Error message if generation failed
+                - expiresAt (str): When the generated data expires
 
         Raises:
             Exception: If the status check fails.
@@ -921,7 +1315,9 @@ class FirecrawlApp:
 
         return {'success': False, 'error': 'Internal server error'}
 
-    def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
+    def _prepare_headers(
+            self,
+            idempotency_key: Optional[str] = None) -> Dict[str, str]:
         """
         Prepare the headers for API requests.
 
@@ -943,11 +1339,13 @@ class FirecrawlApp:
             'Authorization': f'Bearer {self.api_key}',
         }
 
-    def _post_request(self, url: str,
-                      data: Dict[str, Any],
-                      headers: Dict[str, str],
-                      retries: int = 3,
-                      backoff_factor: float = 0.5) -> requests.Response:
+    def _post_request(
+            self,
+            url: str,
+            data: Dict[str, Any],
+            headers: Dict[str, str],
+            retries: int = 3,
+            backoff_factor: float = 0.5) -> requests.Response:
         """
         Make a POST request with retries.
 
@@ -972,10 +1370,12 @@ class FirecrawlApp:
                 return response
         return response
 
-    def _get_request(self, url: str,
-                     headers: Dict[str, str],
-                     retries: int = 3,
-                     backoff_factor: float = 0.5) -> requests.Response:
+    def _get_request(
+            self,
+            url: str,
+            headers: Dict[str, str],
+            retries: int = 3,
+            backoff_factor: float = 0.5) -> requests.Response:
         """
         Make a GET request with retries.
 
@@ -999,10 +1399,12 @@ class FirecrawlApp:
                 return response
         return response
     
-    def _delete_request(self, url: str,
-                        headers: Dict[str, str],
-                        retries: int = 3,
-                        backoff_factor: float = 0.5) -> requests.Response:
+    def _delete_request(
+            self,
+            url: str,
+            headers: Dict[str, str],
+            retries: int = 3,
+            backoff_factor: float = 0.5) -> requests.Response:
         """
         Make a DELETE request with retries.
 
@@ -1026,16 +1428,21 @@ class FirecrawlApp:
                 return response
         return response
 
-    def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any:
+    def _monitor_job_status(
+            self,
+            id: str,
+            headers: Dict[str, str],
+            poll_interval: int) -> CrawlStatusResponse:
         """
         Monitor the status of a crawl job until completion.
 
         Args:
             id (str): The ID of the crawl job.
             headers (Dict[str, str]): The headers to include in the status check requests.
-            poll_interval (int): Secounds between status checks.
+            poll_interval (int): Seconds between status checks.
+
         Returns:
-            Any: The crawl results if the job is completed successfully.
+            CrawlStatusResponse: The crawl results if the job is completed successfully.
 
         Raises:
             Exception: If the job fails or an error occurs during status checks.
@@ -1073,7 +1480,10 @@ class FirecrawlApp:
             else:
                 self._handle_error(status_response, 'check crawl status')
 
-    def _handle_error(self, response: requests.Response, action: str) -> None:
+    def _handle_error(
+            self,
+            response: requests.Response,
+            action: str) -> None:
         """
         Handle errors from API responses.
 
@@ -1105,22 +1515,47 @@ class FirecrawlApp:
         # Raise an HTTPError with the custom message and attach the response
         raise requests.exceptions.HTTPError(message, response=response)
 
-    def deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, 
-                     on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
-                     on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> Dict[str, Any]:
+    def deep_research(
+            self,
+            query: str,
+            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, 
+            on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
+            on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
         """
         Initiates a deep research operation on a given query and polls until completion.
 
         Args:
-            query (str): The query to research.
-            params (Optional[Union[Dict[str, Any], DeepResearchParams]]): Parameters for the deep research operation.
-            on_activity (Optional[Callable[[Dict[str, Any]], None]]): Optional callback to receive activity updates in real-time.
+            query: Research query or topic to investigate
+
+            params: See DeepResearchParams model:
+                Research Settings:
+                * maxDepth - Maximum research depth (default: 7)
+                * timeLimit - Time limit in seconds (default: 270)
+                * maxUrls - Maximum URLs to process (default: 20)
+
+            Callbacks:
+            * on_activity - Progress callback receiving:
+                {type, status, message, timestamp, depth}
+            * on_source - Source discovery callback receiving:
+                {url, title, description}
 
         Returns:
-            Dict[str, Any]: The final research results.
+            DeepResearchResponse containing:
+
+            Status:
+            * success - Whether research completed successfully
+            * status - Current state (processing/completed/failed)
+            * error - Error message if failed
+            
+            Results:
+            * id - Unique identifier for the research job
+            * data - Research findings and analysis
+            * sources - List of discovered sources
+            * activities - Research progress log
+            * summaries - Generated research summaries
 
         Raises:
-            Exception: If the research operation fails.
+            Exception: If research fails
         """
         if params is None:
             params = {}
@@ -1164,16 +1599,26 @@ class FirecrawlApp:
 
         return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
 
-    def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> Dict[str, Any]:
+    def async_deep_research(
+            self,
+            query: str,
+            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse:
         """
         Initiates an asynchronous deep research operation.
 
         Args:
-            query (str): The query to research.
-            params (Optional[Union[Dict[str, Any], DeepResearchParams]]): Parameters for the deep research operation.
+            query (str): The research query to investigate. Should be a clear, specific question or topic.
+            params (Optional[Union[Dict[str, Any], DeepResearchParams]]): Research configuration parameters:
+                - maxDepth (int, optional): Maximum depth of research exploration (default: 7)
+                - timeLimit (int, optional): Time limit in seconds for research (default: 270)
+                - maxUrls (int, optional): Maximum number of URLs to process (default: 20)
+                - __experimental_streamSteps (bool, optional): Enable streaming of research steps
 
         Returns:
-            Dict[str, Any]: The response from the deep research initiation.
+            DeepResearchResponse: A response containing:
+                - success (bool): Whether the research initiation was successful
+                - id (str): The unique identifier for the research job
+                - error (str, optional): Error message if initiation failed
 
         Raises:
             Exception: If the research initiation fails.
@@ -1203,7 +1648,7 @@ class FirecrawlApp:
 
         return {'success': False, 'error': 'Internal server error'}
 
-    def check_deep_research_status(self, id: str) -> Dict[str, Any]:
+    def check_deep_research_status(self, id: str) -> DeepResearchStatusResponse:
         """
         Check the status of a deep research operation.
 
@@ -1211,7 +1656,19 @@ class FirecrawlApp:
             id (str): The ID of the deep research operation.
 
         Returns:
-            Dict[str, Any]: The current status and results of the research operation.
+            DeepResearchResponse containing:
+
+            Status:
+            * success - Whether research completed successfully
+            * status - Current state (processing/completed/failed)
+            * error - Error message if failed
+            
+            Results:
+            * id - Unique identifier for the research job
+            * data - Research findings and analysis
+            * sources - List of discovered sources
+            * activities - Research progress log
+            * summaries - Generated research summaries
 
         Raises:
             Exception: If the status check fails.
@@ -1232,8 +1689,18 @@ class FirecrawlApp:
             raise ValueError(str(e))
 
         return {'success': False, 'error': 'Internal server error'}
-
 class CrawlWatcher:
+    """
+    A class to watch and handle crawl job events via WebSocket connection.
+
+    Attributes:
+        id (str): The ID of the crawl job to watch
+        app (FirecrawlApp): The FirecrawlApp instance
+        data (List[Dict[str, Any]]): List of crawled documents/data
+        status (str): Current status of the crawl job
+        ws_url (str): WebSocket URL for the crawl job
+        event_handlers (dict): Dictionary of event type to list of handler functions
+    """
     def __init__(self, id: str, app: FirecrawlApp):
         self.id = id
         self.app = app
@@ -1246,25 +1713,54 @@ class CrawlWatcher:
             'document': []
         }
 
-    async def connect(self):
+    async def connect(self) -> None:
+        """
+        Establishes WebSocket connection and starts listening for messages.
+        """
         async with websockets.connect(self.ws_url, extra_headers={"Authorization": f"Bearer {self.app.api_key}"}) as websocket:
             await self._listen(websocket)
 
-    async def _listen(self, websocket):
+    async def _listen(self, websocket) -> None:
+        """
+        Listens for incoming WebSocket messages and handles them.
+
+        Args:
+            websocket: The WebSocket connection object
+        """
         async for message in websocket:
             msg = json.loads(message)
             await self._handle_message(msg)
 
-    def add_event_listener(self, event_type: str, handler):
+    def add_event_listener(self, event_type: str, handler: Callable[[Dict[str, Any]], None]) -> None:
+        """
+        Adds an event handler function for a specific event type.
+
+        Args:
+            event_type (str): Type of event to listen for ('done', 'error', or 'document')
+            handler (Callable): Function to handle the event
+        """
         if event_type in self.event_handlers:
             self.event_handlers[event_type].append(handler)
 
-    def dispatch_event(self, event_type: str, detail: Dict[str, Any]):
+    def dispatch_event(self, event_type: str, detail: Dict[str, Any]) -> None:
+        """
+        Dispatches an event to all registered handlers for that event type.
+
+        Args:
+            event_type (str): Type of event to dispatch
+            detail (Dict[str, Any]): Event details/data to pass to handlers
+        """
         if event_type in self.event_handlers:
             for handler in self.event_handlers[event_type]:
                 handler(detail)
 
-    async def _handle_message(self, msg: Dict[str, Any]):
+    async def _handle_message(self, msg: Dict[str, Any]) -> None:
+        """
+        Handles incoming WebSocket messages based on their type.
+
+        Args:
+            msg (Dict[str, Any]): The message to handle
+        """
         if msg['type'] == 'done':
             self.status = 'completed'
             self.dispatch_event('done', {'status': self.status, 'data': self.data, 'id': self.id})

From 3641070ece62d26a007f86385a4fe2aecfd96b16 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Thu, 13 Mar 2025 16:27:59 -0300
Subject: [PATCH 02/26] async

---
 apps/python-sdk/example.py             |    4 +-
 apps/python-sdk/example_async.py       |  168 +++
 apps/python-sdk/firecrawl/firecrawl.py | 1770 +++++++++++++++++++++---
 apps/python-sdk/requirements.txt       |    3 +-
 4 files changed, 1762 insertions(+), 183 deletions(-)
 create mode 100644 apps/python-sdk/example_async.py

diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py
index fb960187..ae4258f7 100644
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@@ -47,7 +47,7 @@ while attempts > 0 and crawl_status['status'] != 'completed':
     attempts -= 1
     time.sleep(1)
 
-crawl_status = app.get_crawl_status(async_result['id'])
+crawl_status = app.check_crawl_status(async_result['id'])
 print(crawl_status)
 
 # LLM Extraction:
@@ -155,4 +155,4 @@ async def start_crawl_and_watch():
     watcher.add_event_listener("done", on_done)
 
     # Start the watcher
-    await watcher.connect()
+    await watcher.connect()
\ No newline at end of file
diff --git a/apps/python-sdk/example_async.py b/apps/python-sdk/example_async.py
new file mode 100644
index 00000000..7afe6a70
--- /dev/null
+++ b/apps/python-sdk/example_async.py
@@ -0,0 +1,168 @@
+import time
+import nest_asyncio
+import uuid
+import asyncio
+from firecrawl.firecrawl import AsyncFirecrawlApp
+from pydantic import BaseModel, Field
+from typing import List
+
+app = AsyncFirecrawlApp(api_key="fc-")
+
+async def example_scrape():
+    # Scrape a website:
+    scrape_result = await app.scrape_url('firecrawl.dev')
+    print(scrape_result['markdown'])
+
+async def example_batch_scrape():
+    # Batch scrape
+    urls = ['https://example.com', 'https://docs.firecrawl.dev']
+    batch_scrape_params = {
+        'formats': ['markdown', 'html'],
+    }
+
+    # Synchronous batch scrape
+    batch_result = await app.batch_scrape_urls(urls, batch_scrape_params)
+    print("Synchronous Batch Scrape Result:")
+    print(batch_result['data'][0]['markdown'])
+
+    # Asynchronous batch scrape
+    async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params)
+    print("\nAsynchronous Batch Scrape Result:")
+    print(async_batch_result)
+
+async def example_crawl():
+    # Crawl a website:
+    idempotency_key = str(uuid.uuid4()) # optional idempotency key
+    crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
+    print(crawl_result)
+
+    # Asynchronous Crawl a website:
+    async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "")
+    print(async_result)
+
+    crawl_status = await app.check_crawl_status(async_result['id'])
+    print(crawl_status)
+
+    attempts = 15
+    while attempts > 0 and crawl_status['status'] != 'completed':
+        print(crawl_status)
+        crawl_status = await app.check_crawl_status(async_result['id'])
+        attempts -= 1
+        await asyncio.sleep(1)  # Use async sleep instead of time.sleep
+
+    crawl_status = await app.check_crawl_status(async_result['id'])
+    print(crawl_status)
+
+async def example_llm_extraction():
+    # Define schema to extract contents into using pydantic
+    class ArticleSchema(BaseModel):
+        title: str
+        points: int 
+        by: str
+        commentsURL: str
+
+    class TopArticlesSchema(BaseModel):
+        top: List[ArticleSchema] = Field(..., description="Top 5 stories")
+
+    llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', {
+        'formats': ['extract'],
+        'extract': {
+            'schema': TopArticlesSchema.model_json_schema()
+        }
+    })
+
+    print(llm_extraction_result['extract'])
+
+    # Define schema to extract contents into using json schema
+    json_schema = {
+      "type": "object",
+      "properties": {
+        "top": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "title": {"type": "string"},
+              "points": {"type": "number"},
+              "by": {"type": "string"},
+              "commentsURL": {"type": "string"}
+            },
+            "required": ["title", "points", "by", "commentsURL"]
+          },
+          "minItems": 5,
+          "maxItems": 5,
+          "description": "Top 5 stories on Hacker News"
+        }
+      },
+      "required": ["top"]
+    }
+
+    app2 = AsyncFirecrawlApp(api_key="fc-", version="v0")
+
+    llm_extraction_result = await app2.scrape_url('https://news.ycombinator.com', {
+        'extractorOptions': {
+            'extractionSchema': json_schema,
+            'mode': 'llm-extraction'
+        },
+        'pageOptions':{
+            'onlyMainContent': True
+        }
+    })
+
+async def example_map_and_extract():
+    # Map a website:
+    map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' })
+    print(map_result)
+
+    # Extract URLs:
+    class ExtractSchema(BaseModel):
+        title: str
+        description: str
+        links: List[str]
+
+    # Define the schema using Pydantic
+    extract_schema = ExtractSchema.schema()
+
+    # Perform the extraction
+    extract_result = await app.extract(['https://firecrawl.dev'], {
+        'prompt': "Extract the title, description, and links from the website",
+        'schema': extract_schema
+    })
+    print(extract_result)
+
+# Define event handlers for websocket
+def on_document(detail):
+    print("DOC", detail)
+
+def on_error(detail):
+    print("ERR", detail['error'])
+
+def on_done(detail):
+    print("DONE", detail['status'])
+
+async def example_websocket_crawl():
+    # Initiate the crawl job and get the watcher
+    watcher = await app.crawl_url_and_watch('firecrawl.dev', { 'excludePaths': ['blog/*'], 'limit': 5 })
+
+    # Add event listeners
+    watcher.add_event_listener("document", on_document)
+    watcher.add_event_listener("error", on_error)
+    watcher.add_event_listener("done", on_done)
+
+    # Start the watcher
+    await watcher.connect()
+
+async def main():
+    # Apply nest_asyncio to allow nested event loops
+    nest_asyncio.apply()
+    
+    # Run all the examples
+    await example_scrape()
+    await example_batch_scrape()
+    await example_crawl()
+    await example_llm_extraction()
+    await example_map_and_extract()
+    await example_websocket_crawl()
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index d212dea7..e0f8c940 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -19,6 +19,8 @@ from datetime import datetime
 import requests
 import pydantic
 import websockets
+import aiohttp
+import asyncio
 
 logger : logging.Logger = logging.getLogger("firecrawl")
 
@@ -326,21 +328,19 @@ class FirecrawlApp:
 
         Args:
           url (str): Target URL to scrape
-
           params (Optional[Dict[str, Any]]): See ScrapeParams model for configuration:
-
             Content Options:
             * formats - Content types to retrieve (markdown/html/etc)
             * includeTags - HTML tags to include
             * excludeTags - HTML tags to exclude
             * onlyMainContent - Extract main content only
-                  
+                
             Request Options:
             * headers - Custom HTTP headers
             * timeout - Request timeout (ms)
             * mobile - Use mobile user agent
             * proxy - Proxy type (basic/stealth)
-                  
+                
             Extraction Options:
             * extract - Content extraction settings
             * jsonOptions - JSON extraction settings
@@ -348,7 +348,6 @@ class FirecrawlApp:
 
         Returns:
           ScrapeResponse with:
-          
           * Requested content formats
           * Page metadata
           * Extraction results
@@ -465,7 +464,7 @@ class FirecrawlApp:
             raise Exception(f'Failed to parse Firecrawl response as JSON.')
 
     def crawl_url(self, url: str,
-                  params: Optional[Dict[str, Any]] = None,
+                  params: Optional[CrawlParams] = None,
                   poll_interval: Optional[int] = 2,
                   idempotency_key: Optional[str] = None) -> CrawlStatusResponse:
         """
@@ -473,9 +472,7 @@ class FirecrawlApp:
 
         Args:
           url (str): Target URL to start crawling from
-
-          params (Optional[Dict[str, Any]]): See CrawlParams model for configuration:
-
+          params (Optional[CrawlParams]): See CrawlParams model:
             URL Discovery:
             * includePaths - Patterns of URLs to include
             * excludePaths - Patterns of URLs to exclude
@@ -494,10 +491,8 @@ class FirecrawlApp:
             * deduplicateSimilarURLs - Remove similar URLs
             * ignoreQueryParameters - Ignore URL parameters
             * regexOnFullURL - Apply regex to full URLs
-
-          poll_interval: Seconds between status checks (default: 2)
-          
-          idempotency_key: Request deduplication key
+          poll_interval (int): Seconds between status checks (default: 2)
+          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
           CrawlStatusResponse with:
@@ -667,10 +662,19 @@ class FirecrawlApp:
         Returns information about crawl errors.
 
         Args:
-            id (str): The ID of the crawl job.
+            id (str): The ID of the crawl job
 
         Returns:
-            Dict[str, Any]: Information about crawl errors.
+            CrawlErrorsResponse containing:
+            * errors (List[Dict[str, str]]): List of errors with fields:
+                - id (str): Error ID
+                - timestamp (str): When the error occurred
+                - url (str): URL that caused the error
+                - error (str): Error message
+            * robotsBlocked (List[str]): List of URLs blocked by robots.txt
+
+        Raises:
+            Exception: If error check fails
         """
         headers = self._prepare_headers()
         response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
@@ -684,13 +688,18 @@ class FirecrawlApp:
     
     def cancel_crawl(self, id: str) -> Dict[str, Any]:
         """
-        Cancel an asynchronous crawl job using the Firecrawl API.
+        Cancel an asynchronous crawl job.
 
         Args:
-            id (str): The ID of the crawl job to cancel.
+            id (str): The ID of the crawl job to cancel
 
         Returns:
-            Dict[str, Any]: The response from the cancel crawl request.
+            Dict[str, Any] containing:
+            * success (bool): Whether cancellation was successful
+            * error (str, optional): Error message if cancellation failed
+
+        Raises:
+            Exception: If cancellation fails
         """
         headers = self._prepare_headers()
         response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
@@ -702,17 +711,42 @@ class FirecrawlApp:
         else:
             self._handle_error(response, "cancel crawl job")
 
-    def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
+    def crawl_url_and_watch(
+            self,
+            url: str,
+            params: Optional[CrawlParams] = None,
+            idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
         """
         Initiate a crawl job and return a CrawlWatcher to monitor the job via WebSocket.
 
         Args:
-            url (str): The URL to crawl.
-            params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
-            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
+          url (str): Target URL to start crawling from
+          params (Optional[CrawlParams]): See CrawlParams model for configuration:
+            URL Discovery:
+            * includePaths - Patterns of URLs to include
+            * excludePaths - Patterns of URLs to exclude
+            * maxDepth - Maximum crawl depth
+            * maxDiscoveryDepth - Maximum depth for finding new URLs
+            * limit - Maximum pages to crawl
+
+            Link Following:
+            * allowBackwardLinks - Follow parent directory links
+            * allowExternalLinks - Follow external domain links  
+            * ignoreSitemap - Skip sitemap.xml processing
+
+            Advanced:
+            * scrapeOptions - Page scraping configuration
+            * webhook - Notification webhook settings
+            * deduplicateSimilarURLs - Remove similar URLs
+            * ignoreQueryParameters - Ignore URL parameters
+            * regexOnFullURL - Apply regex to full URLs
+          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
-            CrawlWatcher: An instance of CrawlWatcher to monitor the crawl job.
+          AsyncCrawlWatcher: An instance to monitor the crawl job via WebSocket
+
+        Raises:
+          Exception: If crawl job fails to start
         """
         crawl_response = self.async_crawl_url(url, params, idempotency_key)
         if crawl_response['success'] and 'id' in crawl_response:
@@ -725,27 +759,27 @@ class FirecrawlApp:
         Map and discover links from a URL.
 
         Args:
-            url: Target URL to map
+          url: Target URL to map
 
-            params: See MapParams model:
+          params: See MapParams model:
 
-                Discovery Options:
-                * search - Filter pattern for URLs
-                * ignoreSitemap - Skip sitemap.xml
-                * includeSubdomains - Include subdomain links
-                * sitemapOnly - Only use sitemap.xml
-                
-                Limits:
-                * limit - Max URLs to return
-                * timeout - Request timeout (ms)
+            Discovery Options:
+            * search - Filter pattern for URLs
+            * ignoreSitemap - Skip sitemap.xml
+            * includeSubdomains - Include subdomain links
+            * sitemapOnly - Only use sitemap.xml
+            
+            Limits:
+            * limit - Max URLs to return
+            * timeout - Request timeout (ms)
 
         Returns:
-            MapResponse with:
-            * Discovered URLs
-            * Success/error status
+          MapResponse with:
+          * Discovered URLs
+          * Success/error status
 
         Raises:
-            Exception: If mapping fails
+          Exception: If mapping fails
         """
         endpoint = f'/v1/map'
         headers = self._prepare_headers()
@@ -776,46 +810,40 @@ class FirecrawlApp:
             self._handle_error(response, 'map')
 
     def batch_scrape_urls(self, urls: List[str],
-                  params: Optional[Dict[str, Any]] = None,
+                  params: Optional[ScrapeParams] = None,
                   poll_interval: Optional[int] = 2,
                   idempotency_key: Optional[str] = None) -> BatchScrapeStatusResponse:
         """
         Batch scrape multiple URLs and monitor until completion.
 
         Args:
-            urls: URLs to scrape
-
-            params: See ScrapeParams model:
-
-                Content Options:
-                * formats - Content formats to retrieve
-                * includeTags - HTML tags to include
-                * excludeTags - HTML tags to exclude
-                * onlyMainContent - Extract main content only
+            urls (List[str]): URLs to scrape
+            params (Optional[ScrapeParams]): See ScrapeParams model:
+              Content Options:
+              * formats - Content formats to retrieve
+              * includeTags - HTML tags to include
+              * excludeTags - HTML tags to exclude
+              * onlyMainContent - Extract main content only
                 
-                Request Options:
-                * headers - Custom HTTP headers
-                * timeout - Request timeout (ms)
-                * mobile - Use mobile user agent
-                * proxy - Proxy type
-                
-                Extraction Options:
-                * extract - Content extraction config
-                * jsonOptions - JSON extraction config
-                * actions - Actions to perform
-
-            poll_interval: Seconds between status checks (default: 2)
-
-            idempotency_key: Request deduplication key
+              Request Options:
+              * headers - Custom HTTP headers
+              * timeout - Request timeout (ms)
+              * mobile - Use mobile user agent
+              * proxy - Proxy type
+              
+              Extraction Options:
+              * extract - Content extraction config
+              * jsonOptions - JSON extraction config
+              * actions - Actions to perform
 
         Returns:
-            BatchScrapeStatusResponse with:
-            * Scraping status and progress
-            * Scraped content for each URL
-            * Success/error information
+          BatchScrapeStatusResponse with:
+          * Scraping status and progress
+          * Scraped content for each URL
+          * Success/error information
 
         Raises:
-            Exception: If batch scrape fails
+          Exception: If batch scrape fails
         """
         endpoint = f'/v1/batch/scrape'
         headers = self._prepare_headers(idempotency_key)
@@ -837,21 +865,41 @@ class FirecrawlApp:
     def async_batch_scrape_urls(
             self,
             urls: List[str],
-            params: Optional[Dict[str, Any]] = None,
+            params: Optional[ScrapeParams] = None,
             idempotency_key: Optional[str] = None) -> BatchScrapeResponse:
         """
         Initiate a batch scrape job asynchronously.
 
         Args:
-            urls (List[str]): The URLs to scrape.
-            params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
-            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
+          urls (List[str]): List of URLs to scrape
+          params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
+            Content Options:
+            * formats - Content formats to retrieve
+            * includeTags - HTML tags to include
+            * excludeTags - HTML tags to exclude
+            * onlyMainContent - Extract main content only
+            
+            Request Options:
+            * headers - Custom HTTP headers
+            * timeout - Request timeout (ms)
+            * mobile - Use mobile user agent
+            * proxy - Proxy type
+            
+            Extraction Options:
+            * extract - Content extraction config
+            * jsonOptions - JSON extraction config
+            * actions - Actions to perform
+          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
-            BatchScrapeResponse: A dictionary containing the batch scrape initiation response. The structure includes:
-                - 'success' (bool): Indicates if the batch scrape initiation was successful.
-                - 'id' (str): The unique identifier for the batch scrape job.
-                - 'url' (str): The URL to check the status of the batch scrape job.
+          BatchScrapeResponse with:
+          * success - Whether job started successfully
+          * id - Unique identifier for the job
+          * url - Status check URL
+          * error - Error message if start failed
+
+        Raises:
+          Exception: If job initiation fails
         """
         endpoint = f'/v1/batch/scrape'
         headers = self._prepare_headers(idempotency_key)
@@ -876,12 +924,32 @@ class FirecrawlApp:
         Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
 
         Args:
-            urls (List[str]): The URLs to scrape.
-            params (Optional[ScrapeParams]): Additional parameters for the scraper.
-            idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
+            urls (List[str]): List of URLs to scrape
+            params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
+
+              Content Options:
+              * formats - Content formats to retrieve
+              * includeTags - HTML tags to include
+              * excludeTags - HTML tags to exclude
+              * onlyMainContent - Extract main content only
+              
+              Request Options:
+              * headers - Custom HTTP headers
+              * timeout - Request timeout (ms)
+              * mobile - Use mobile user agent
+              * proxy - Proxy type
+              
+              Extraction Options:
+              * extract - Content extraction config
+              * jsonOptions - JSON extraction config
+              * actions - Actions to perform
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
-            CrawlWatcher: An instance of CrawlWatcher to monitor the batch scrape job.
+            AsyncCrawlWatcher: An instance to monitor the batch scrape job via WebSocket
+
+        Raises:
+            Exception: If batch scrape job fails to start
         """
         crawl_response = self.async_batch_scrape_urls(urls, params, idempotency_key)
         if crawl_response['success'] and 'id' in crawl_response:
@@ -964,16 +1032,16 @@ class FirecrawlApp:
         Returns information about batch scrape errors.
 
         Args:
-            id (str): The ID of the crawl job.
+          id (str): The ID of the crawl job.
 
         Returns:
             CrawlErrorsResponse: A response containing:
-                - errors (List[Dict[str, str]]): List of errors with fields:
-                    - id (str): Error ID
-                    - timestamp (str): When the error occurred
-                    - url (str): URL that caused the error
-                    - error (str): Error message
-                - robotsBlocked (List[str]): List of URLs blocked by robots.txt
+            * errors (List[Dict[str, str]]): List of errors with fields:
+              * id (str): Error ID
+              * timestamp (str): When the error occurred
+              * url (str): URL that caused the error
+              * error (str): Error message
+            * robotsBlocked (List[str]): List of URLs blocked by robots.txt
         """
         headers = self._prepare_headers()
         response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
@@ -997,19 +1065,19 @@ class FirecrawlApp:
 
             params: See ExtractParams model:
 
-                Extraction Config:
-                * prompt - Custom extraction prompt
-                * schema - JSON schema/Pydantic model
-                * systemPrompt - System context
-                
-                Behavior Options:
-                * allowExternalLinks - Follow external links
-                * enableWebSearch - Enable web search
-                * includeSubdomains - Include subdomains
-                * showSources - Include source URLs
-                
-                Scraping Options:
-                * scrapeOptions - Page scraping config
+              Extraction Config:
+              * prompt - Custom extraction prompt
+              * schema - JSON schema/Pydantic model
+              * systemPrompt - System context
+              
+              Behavior Options:
+              * allowExternalLinks - Follow external links
+              * enableWebSearch - Enable web search
+              * includeSubdomains - Include subdomains
+              * showSources - Include source URLs
+              
+              Scraping Options:
+              * scrapeOptions - Page scraping config
 
         Returns:
             ExtractResponse with:
@@ -1120,32 +1188,40 @@ class FirecrawlApp:
         except Exception as e:
             raise ValueError(str(e), 500)
 
-    def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
+    def async_extract(
+            self,
+            urls: List[str],
+            params: Optional[ExtractParams] = None,
+            idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
         """
         Initiate an asynchronous extract job.
 
         Args:
-            urls (List[str]): List of URLs to extract information from. Must be valid HTTP/HTTPS URLs.
-            params (Optional[Dict[str, Any]]): Extraction configuration parameters:
-                - prompt (str, optional): Custom prompt for extraction
-                - schema (Any, optional): JSON schema or Pydantic model for structured extraction
-                - systemPrompt (str, optional): System prompt for extraction
-                - allowExternalLinks (bool, optional): Allow following external links
-                - enableWebSearch (bool, optional): Enable web search during extraction
-                - includeSubdomains (bool, optional): Include content from subdomains
-                - origin (str, optional): Source of the extraction request
-                - showSources (bool, optional): Include source URLs in response
-                - scrapeOptions (CrawlScrapeOptions, optional): Configuration for scraping pages
-            idempotency_key (Optional[str]): Unique identifier to prevent duplicate requests.
+            urls (List[str]): URLs to extract information from
+            params (Optional[ExtractParams]): See ExtractParams model:
+              Extraction Config:
+              * prompt - Custom extraction prompt
+              * schema - JSON schema/Pydantic model
+              * systemPrompt - System context
+              
+              Behavior Options:
+              * allowExternalLinks - Follow external links
+              * enableWebSearch - Enable web search
+              * includeSubdomains - Include subdomains
+              * showSources - Include source URLs
+              
+              Scraping Options:
+              * scrapeOptions - Page scraping config
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
-            ExtractResponse[Any]: A response containing:
-                - success (bool): Whether the extraction initiation was successful
-                - id (str): The unique identifier for the extract job
-                - error (str, optional): Error message if initiation failed
+          ExtractResponse containing:
+          * success (bool): Whether job started successfully
+          * id (str): Unique identifier for the job
+          * error (str, optional): Error message if start failed
 
         Raises:
-            ValueError: If neither prompt nor schema is provided, or if there is an error during initiation.
+          ValueError: If job initiation fails
         """
         headers = self._prepare_headers(idempotency_key)
         
@@ -1184,24 +1260,26 @@ class FirecrawlApp:
         Generate LLMs.txt for a given URL and poll until completion.
 
         Args:
-            url: Target URL to generate LLMs.txt from
+          url: Target URL to generate LLMs.txt from
 
             params: See GenerateLLMsTextParams model:
+            params: See GenerateLLMsTextParams model:
 
-                Generation Options:
-                * maxUrls - Maximum URLs to process (default: 10)
-                * showFullText - Include full text in output (default: False)
-                * __experimental_stream - Enable streaming of generation progress
+          params: See GenerateLLMsTextParams model:
+
+            Generation Options:
+            * maxUrls - Maximum URLs to process (default: 10)
+            * showFullText - Include full text in output (default: False)
 
         Returns:
-            GenerateLLMsTextStatusResponse with:
-            * Generated LLMs.txt content
-            * Full version if requested
-            * Generation status
-            * Success/error information
+          GenerateLLMsTextStatusResponse with:
+          * Generated LLMs.txt content
+          * Full version if requested
+          * Generation status
+          * Success/error information
 
         Raises:
-            Exception: If generation fails
+          Exception: If generation fails
         """
         if params is None:
             params = {}
@@ -1238,20 +1316,19 @@ class FirecrawlApp:
         Initiate an asynchronous LLMs.txt generation operation.
 
         Args:
-            url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
-            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Generation configuration parameters:
-                - maxUrls (int, optional): Maximum number of URLs to process (default: 10)
-                - showFullText (bool, optional): Include full text in output (default: False)
-                - __experimental_stream (bool, optional): Enable streaming of generation progress
+          url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
+          params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Generation configuration parameters:
+            * maxUrls (int, optional): Maximum number of URLs to process (default: 10)
+            * showFullText (bool, optional): Include full text in output (default: False)
 
         Returns:
-            GenerateLLMsTextResponse: A response containing:
-                - success (bool): Whether the generation initiation was successful
-                - id (str): The unique identifier for the generation job
-                - error (str, optional): Error message if initiation failed
+          GenerateLLMsTextResponse: A response containing:
+            - success (bool): Whether the generation initiation was successful
+            - id (str): The unique identifier for the generation job
+            - error (str, optional): Error message if initiation failed
 
         Raises:
-            Exception: If the generation job initiation fails.
+          Exception: If the generation job initiation fails.
         """
         if params is None:
             params = {}
@@ -1283,20 +1360,20 @@ class FirecrawlApp:
         Check the status of a LLMs.txt generation operation.
 
         Args:
-            id (str): The unique identifier of the LLMs.txt generation job to check status for.
+          id (str): The unique identifier of the LLMs.txt generation job to check status for.
 
         Returns:
-            GenerateLLMsTextStatusResponse: A response containing:
-                - success (bool): Whether the generation was successful
-                - status (str): Status of generation ("processing", "completed", "failed")
-                - data (Dict[str, str], optional): Generated text with fields:
-                    - llmstxt (str): Generated LLMs.txt content
-                    - llmsfulltxt (str, optional): Full version if requested
-                - error (str, optional): Error message if generation failed
-                - expiresAt (str): When the generated data expires
+          GenerateLLMsTextStatusResponse: A response containing:
+          * success (bool): Whether the generation was successful
+          * status (str): Status of generation ("processing", "completed", "failed")
+          * data (Dict[str, str], optional): Generated text with fields:
+            * llmstxt (str): Generated LLMs.txt content
+            * llmsfulltxt (str, optional): Full version if requested
+          * error (str, optional): Error message if generation failed
+          * expiresAt (str): When the generated data expires
 
         Raises:
-            Exception: If the status check fails.
+          Exception: If the status check fails.
         """
         headers = self._prepare_headers()
         try:
@@ -1525,37 +1602,37 @@ class FirecrawlApp:
         Initiates a deep research operation on a given query and polls until completion.
 
         Args:
-            query: Research query or topic to investigate
+          query: Research query or topic to investigate
 
-            params: See DeepResearchParams model:
-                Research Settings:
-                * maxDepth - Maximum research depth (default: 7)
-                * timeLimit - Time limit in seconds (default: 270)
-                * maxUrls - Maximum URLs to process (default: 20)
+          params: See DeepResearchParams model:
+            Research Settings:
+              * maxDepth - Maximum research depth (default: 7)
+              * timeLimit - Time limit in seconds (default: 270)
+              * maxUrls - Maximum URLs to process (default: 20)
 
-            Callbacks:
-            * on_activity - Progress callback receiving:
-                {type, status, message, timestamp, depth}
-            * on_source - Source discovery callback receiving:
-                {url, title, description}
+          Callbacks:
+          * on_activity - Progress callback receiving:
+              {type, status, message, timestamp, depth}
+          * on_source - Source discovery callback receiving:
+              {url, title, description}
 
         Returns:
-            DeepResearchResponse containing:
+          DeepResearchResponse containing:
 
-            Status:
-            * success - Whether research completed successfully
-            * status - Current state (processing/completed/failed)
-            * error - Error message if failed
-            
-            Results:
-            * id - Unique identifier for the research job
-            * data - Research findings and analysis
-            * sources - List of discovered sources
-            * activities - Research progress log
-            * summaries - Generated research summaries
+          Status:
+          * success - Whether research completed successfully
+          * status - Current state (processing/completed/failed)
+          * error - Error message if failed
+          
+          Results:
+          * id - Unique identifier for the research job
+          * data - Research findings and analysis
+          * sources - List of discovered sources
+          * activities - Research progress log
+          * summaries - Generated research summaries
 
         Raises:
-            Exception: If research fails
+          Exception: If research fails
         """
         if params is None:
             params = {}
@@ -1609,16 +1686,15 @@ class FirecrawlApp:
         Args:
             query (str): The research query to investigate. Should be a clear, specific question or topic.
             params (Optional[Union[Dict[str, Any], DeepResearchParams]]): Research configuration parameters:
-                - maxDepth (int, optional): Maximum depth of research exploration (default: 7)
-                - timeLimit (int, optional): Time limit in seconds for research (default: 270)
-                - maxUrls (int, optional): Maximum number of URLs to process (default: 20)
-                - __experimental_streamSteps (bool, optional): Enable streaming of research steps
+              * maxDepth (int, optional): Maximum depth of research exploration (default: 7)
+              * timeLimit (int, optional): Time limit in seconds for research (default: 270)
+              * maxUrls (int, optional): Maximum number of URLs to process (default: 20)
 
         Returns:
-            DeepResearchResponse: A response containing:
-                - success (bool): Whether the research initiation was successful
-                - id (str): The unique identifier for the research job
-                - error (str, optional): Error message if initiation failed
+          DeepResearchResponse: A response containing:
+            * success (bool): Whether the research initiation was successful
+            * id (str): The unique identifier for the research job
+            * error (str, optional): Error message if initiation failed
 
         Raises:
             Exception: If the research initiation fails.
@@ -1689,6 +1765,7 @@ class FirecrawlApp:
             raise ValueError(str(e))
 
         return {'success': False, 'error': 'Internal server error'}
+
 class CrawlWatcher:
     """
     A class to watch and handle crawl job events via WebSocket connection.
@@ -1775,3 +1852,1336 @@ class CrawlWatcher:
         elif msg['type'] == 'document':
             self.data.append(msg['data'])
             self.dispatch_event('document', {'data': msg['data'], 'id': self.id})
+
+class AsyncFirecrawlApp(FirecrawlApp):
+    """
+    Asynchronous version of FirecrawlApp that implements async methods using aiohttp.
+    Provides non-blocking alternatives to all FirecrawlApp operations.
+    """
+    
+    async def _async_post_request(
+            self,
+            url: str,
+            data: Dict[str, Any],
+            headers: Dict[str, str],
+            retries: int = 3,
+            backoff_factor: float = 0.5) -> Dict[str, Any]:
+        """
+        Make an async POST request with exponential backoff retry logic.
+
+        Args:
+            url (str): The URL to send the POST request to
+            data (Dict[str, Any]): The JSON data to include in the request body
+            headers (Dict[str, str]): Headers to include in the request
+            retries (int): Maximum number of retry attempts (default: 3)
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
+                Delay will be backoff_factor * (2 ** retry_count)
+
+        Returns:
+            Dict[str, Any]: The parsed JSON response from the server
+
+        Raises:
+            aiohttp.ClientError: If the request fails after all retries
+            Exception: If max retries are exceeded or other errors occur
+        """
+        async with aiohttp.ClientSession() as session:
+            for attempt in range(retries):
+                try:
+                    async with session.post(url, headers=headers, json=data) as response:
+                        if response.status == 502:
+                            await asyncio.sleep(backoff_factor * (2 ** attempt))
+                            continue
+                        if response.status != 200:
+                            await self._handle_error(response, "make POST request")
+                        return await response.json()
+                except aiohttp.ClientError as e:
+                    if attempt == retries - 1:
+                        raise e
+                    await asyncio.sleep(backoff_factor * (2 ** attempt))
+            raise Exception("Max retries exceeded")
+
+    async def _async_get_request(
+            self,
+            url: str,
+            headers: Dict[str, str],
+            retries: int = 3,
+            backoff_factor: float = 0.5) -> Dict[str, Any]:
+        """
+        Make an async GET request with exponential backoff retry logic.
+
+        Args:
+            url (str): The URL to send the GET request to
+            headers (Dict[str, str]): Headers to include in the request
+            retries (int): Maximum number of retry attempts (default: 3)
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
+                Delay will be backoff_factor * (2 ** retry_count)
+
+        Returns:
+            Dict[str, Any]: The parsed JSON response from the server
+
+        Raises:
+            aiohttp.ClientError: If the request fails after all retries
+            Exception: If max retries are exceeded or other errors occur
+        """
+        async with aiohttp.ClientSession() as session:
+            for attempt in range(retries):
+                try:
+                    async with session.get(url, headers=headers) as response:
+                        if response.status == 502:
+                            await asyncio.sleep(backoff_factor * (2 ** attempt))
+                            continue
+                        if response.status != 200:
+                            await self._handle_error(response, "make GET request")
+                        return await response.json()
+                except aiohttp.ClientError as e:
+                    if attempt == retries - 1:
+                        raise e
+                    await asyncio.sleep(backoff_factor * (2 ** attempt))
+            raise Exception("Max retries exceeded")
+
+    async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None:
+        """
+        Handle errors from async API responses with detailed error messages.
+
+        Args:
+            response (aiohttp.ClientResponse): The response object from the failed request
+            action (str): Description of the action that was being attempted
+
+        Raises:
+            aiohttp.ClientError: With a detailed error message based on the response status:
+                - 402: Payment Required
+                - 408: Request Timeout
+                - 409: Conflict
+                - 500: Internal Server Error
+                - Other: Unexpected error with status code
+        """
+        try:
+            error_data = await response.json()
+            error_message = error_data.get('error', 'No error message provided.')
+            error_details = error_data.get('details', 'No additional error details provided.')
+        except:
+            raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
+
+        if response.status == 402:
+            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
+        elif response.status == 408:
+            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
+        elif response.status == 409:
+            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
+        elif response.status == 500:
+            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
+        else:
+            message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
+
+        raise aiohttp.ClientError(message)
+
+    async def crawl_url_and_watch(
+            self,
+            url: str,
+            params: Optional[CrawlParams] = None,
+            idempotency_key: Optional[str] = None) -> 'AsyncCrawlWatcher':
+        """
+        Initiate an async crawl job and return an AsyncCrawlWatcher to monitor progress via WebSocket.
+
+        Args:
+          url (str): Target URL to start crawling from
+          params (Optional[CrawlParams]): See CrawlParams model for configuration:
+            URL Discovery:
+            * includePaths - Patterns of URLs to include
+            * excludePaths - Patterns of URLs to exclude
+            * maxDepth - Maximum crawl depth
+            * maxDiscoveryDepth - Maximum depth for finding new URLs
+            * limit - Maximum pages to crawl
+
+            Link Following:
+            * allowBackwardLinks - Follow parent directory links
+            * allowExternalLinks - Follow external domain links  
+            * ignoreSitemap - Skip sitemap.xml processing
+
+            Advanced:
+            * scrapeOptions - Page scraping configuration
+            * webhook - Notification webhook settings
+            * deduplicateSimilarURLs - Remove similar URLs
+            * ignoreQueryParameters - Ignore URL parameters
+            * regexOnFullURL - Apply regex to full URLs
+          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+
+        Returns:
+          AsyncCrawlWatcher: An instance to monitor the crawl job via WebSocket
+
+        Raises:
+          Exception: If crawl job fails to start
+        """
+        crawl_response = await self.async_crawl_url(url, params, idempotency_key)
+        if crawl_response.get('success') and 'id' in crawl_response:
+            return AsyncCrawlWatcher(crawl_response['id'], self)
+        else:
+            raise Exception("Crawl job failed to start")
+
+    async def batch_scrape_urls_and_watch(
+            self,
+            urls: List[str],
+            params: Optional[ScrapeParams] = None,
+            idempotency_key: Optional[str] = None) -> 'AsyncCrawlWatcher':
+        """
+        Initiate an async batch scrape job and return an AsyncCrawlWatcher to monitor progress.
+
+        Args:
+            urls (List[str]): List of URLs to scrape
+            params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
+
+              Content Options:
+              * formats - Content formats to retrieve
+              * includeTags - HTML tags to include
+              * excludeTags - HTML tags to exclude
+              * onlyMainContent - Extract main content only
+              
+              Request Options:
+              * headers - Custom HTTP headers
+              * timeout - Request timeout (ms)
+              * mobile - Use mobile user agent
+              * proxy - Proxy type
+              
+              Extraction Options:
+              * extract - Content extraction config
+              * jsonOptions - JSON extraction config
+              * actions - Actions to perform
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+
+        Returns:
+            AsyncCrawlWatcher: An instance to monitor the batch scrape job via WebSocket
+
+        Raises:
+            Exception: If batch scrape job fails to start
+        """
+        batch_response = await self.async_batch_scrape_urls(urls, params, idempotency_key)
+        if batch_response.get('success') and 'id' in batch_response:
+            return AsyncCrawlWatcher(batch_response['id'], self)
+        else:
+            raise Exception("Batch scrape job failed to start")
+
+    async def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> ScrapeResponse[Any]:
+        """
+        Asynchronously scrape and extract content from a URL.
+
+        Args:
+            url (str): Target URL to scrape
+            params (Optional[Dict[str, Any]]): See ScrapeParams model for configuration:
+              Content Options:
+              * formats - Content types to retrieve (markdown/html/etc)
+              * includeTags - HTML tags to include
+              * excludeTags - HTML tags to exclude
+              * onlyMainContent - Extract main content only
+                  
+              Request Options:
+              * headers - Custom HTTP headers
+              * timeout - Request timeout (ms)
+              * mobile - Use mobile user agent
+              * proxy - Proxy type (basic/stealth)
+                  
+              Extraction Options:
+              * extract - Content extraction settings
+              * jsonOptions - JSON extraction settings
+              * actions - Actions to perform
+
+        Returns:
+          ScrapeResponse with:
+          * Requested content formats
+          * Page metadata
+          * Extraction results
+          * Success/error status
+
+        Raises:
+            Exception: If scraping fails
+        """
+        headers = self._prepare_headers()
+        scrape_params = {'url': url}
+
+        if params:
+            extract = params.get('extract', {})
+            if extract:
+                if 'schema' in extract and hasattr(extract['schema'], 'schema'):
+                    extract['schema'] = extract['schema'].schema()
+                scrape_params['extract'] = extract
+
+            for key, value in params.items():
+                if key not in ['extract']:
+                    scrape_params[key] = value
+
+        endpoint = f'/v1/scrape'
+        response = await self._async_post_request(
+            f'{self.api_url}{endpoint}',
+            scrape_params,
+            headers
+        )
+        
+        if response.get('success') and 'data' in response:
+            return response['data']
+        elif "error" in response:
+            raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
+        else:
+            raise Exception(f'Failed to scrape URL. Error: {response}')
+
+    async def batch_scrape_urls(self, urls: List[str], params: Optional[ScrapeParams] = None) -> BatchScrapeStatusResponse:
+        """
+        Asynchronously scrape multiple URLs and monitor until completion.
+
+        Args:
+            urls (List[str]): URLs to scrape
+            params (Optional[ScrapeParams]): See ScrapeParams model:
+              Content Options:
+              * formats - Content formats to retrieve
+              * includeTags - HTML tags to include
+              * excludeTags - HTML tags to exclude
+              * onlyMainContent - Extract main content only
+                
+              Request Options:
+              * headers - Custom HTTP headers
+              * timeout - Request timeout (ms)
+              * mobile - Use mobile user agent
+              * proxy - Proxy type
+              
+              Extraction Options:
+              * extract - Content extraction config
+              * jsonOptions - JSON extraction config
+              * actions - Actions to perform
+
+        Returns:
+          BatchScrapeStatusResponse with:
+          * Scraping status and progress
+          * Scraped content for each URL
+          * Success/error information
+
+        Raises:
+          Exception: If batch scrape fails
+        """
+        headers = self._prepare_headers()
+        json_data = {'urls': urls}
+        if params:
+            json_data.update(params)
+
+        endpoint = f'/v1/batch/scrape'
+        response = await self._async_post_request(
+            f'{self.api_url}{endpoint}',
+            json_data,
+            headers
+        )
+
+        if response.get('success') and 'id' in response:
+            return await self._async_monitor_job_status(response['id'], headers)
+        else:
+            raise Exception(f'Failed to start batch scrape. Error: {response.get("error")}')
+
+    async def async_batch_scrape_urls(
+            self,
+            urls: List[str],
+            params: Optional[ScrapeParams] = None,
+            idempotency_key: Optional[str] = None) -> BatchScrapeResponse:
+        """
+        Initiate an asynchronous batch scrape job without waiting for completion.
+
+        Args:
+          urls (List[str]): List of URLs to scrape
+          params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
+            Content Options:
+            * formats - Content formats to retrieve
+            * includeTags - HTML tags to include
+            * excludeTags - HTML tags to exclude
+            * onlyMainContent - Extract main content only
+            
+            Request Options:
+            * headers - Custom HTTP headers
+            * timeout - Request timeout (ms)
+            * mobile - Use mobile user agent
+            * proxy - Proxy type
+            
+            Extraction Options:
+            * extract - Content extraction config
+            * jsonOptions - JSON extraction config
+            * actions - Actions to perform
+          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+
+        Returns:
+          BatchScrapeResponse with:
+          * success - Whether job started successfully
+          * id - Unique identifier for the job
+          * url - Status check URL
+          * error - Error message if start failed
+
+        Raises:
+          Exception: If job initiation fails
+        """
+        headers = self._prepare_headers(idempotency_key)
+        json_data = {'urls': urls}
+        if params:
+            json_data.update(params)
+
+        endpoint = f'/v1/batch/scrape'
+        return await self._async_post_request(
+            f'{self.api_url}{endpoint}',
+            json_data,
+            headers
+        )
+
+    async def crawl_url(
+            self,
+            url: str,
+            params: Optional[CrawlParams] = None,
+            poll_interval: int = 2,
+            idempotency_key: Optional[str] = None) -> CrawlStatusResponse:
+        """
+        Asynchronously crawl a website starting from a URL and monitor until completion.
+
+        Args:
+          url (str): Target URL to start crawling from
+          params (Optional[CrawlParams]): See CrawlParams model:
+            URL Discovery:
+            * includePaths - Patterns of URLs to include
+            * excludePaths - Patterns of URLs to exclude
+            * maxDepth - Maximum crawl depth
+            * maxDiscoveryDepth - Maximum depth for finding new URLs
+            * limit - Maximum pages to crawl
+
+            Link Following:
+            * allowBackwardLinks - Follow parent directory links
+            * allowExternalLinks - Follow external domain links  
+            * ignoreSitemap - Skip sitemap.xml processing
+
+            Advanced:
+            * scrapeOptions - Page scraping configuration
+            * webhook - Notification webhook settings
+            * deduplicateSimilarURLs - Remove similar URLs
+            * ignoreQueryParameters - Ignore URL parameters
+            * regexOnFullURL - Apply regex to full URLs
+          poll_interval (int): Seconds between status checks (default: 2)
+          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+
+        Returns:
+          CrawlStatusResponse with:
+          * Crawling status and progress
+          * Crawled page contents
+          * Success/error information
+
+        Raises:
+          Exception: If crawl fails
+        """
+        headers = self._prepare_headers(idempotency_key)
+        json_data = {'url': url}
+        if params:
+            json_data.update(params)
+
+        endpoint = f'/v1/crawl'
+        response = await self._async_post_request(
+            f'{self.api_url}{endpoint}',
+            json_data,
+            headers
+        )
+
+        if response.get('success') and 'id' in response:
+            return await self._async_monitor_job_status(response['id'], headers, poll_interval)
+        else:
+            raise Exception(f'Failed to start crawl. Error: {response.get("error")}')
+
+    async def async_crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> CrawlResponse:
+        """
+        Initiate an asynchronous crawl job without waiting for completion.
+
+        Args:
+            url (str): Target URL to start crawling from
+            params (Optional[Dict[str, Any]]): See CrawlParams model:
+              URL Discovery:
+              * includePaths - Patterns of URLs to include
+              * excludePaths - Patterns of URLs to exclude
+              * maxDepth - Maximum crawl depth
+              * maxDiscoveryDepth - Maximum depth for finding new URLs
+              * limit - Maximum pages to crawl
+
+              Link Following:
+              * allowBackwardLinks - Follow parent directory links
+              * allowExternalLinks - Follow external domain links  
+              * ignoreSitemap - Skip sitemap.xml processing
+
+              Advanced:
+              * scrapeOptions - Page scraping configuration
+              * webhook - Notification webhook settings
+              * deduplicateSimilarURLs - Remove similar URLs
+              * ignoreQueryParameters - Ignore URL parameters
+              * regexOnFullURL - Apply regex to full URLs
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+
+        Returns:
+          CrawlResponse with:
+          * success - Whether job started successfully
+          * id - Unique identifier for the job
+          * url - Status check URL
+          * error - Error message if start failed
+
+        Raises:
+          Exception: If job initiation fails
+        """
+        headers = self._prepare_headers(idempotency_key)
+        json_data = {'url': url}
+        if params:
+            json_data.update(params)
+
+        endpoint = f'/v1/crawl'
+        return await self._async_post_request(
+            f'{self.api_url}{endpoint}',
+            json_data,
+            headers
+        )
+
+    async def check_crawl_status(self, id: str) -> CrawlStatusResponse:
+        """
+        Check the status and results of an asynchronous crawl job.
+
+        Args:
+            id (str): Unique identifier for the crawl job
+
+        Returns:
+            CrawlStatusResponse containing:
+            Status Information:
+            * status - Current state (scraping/completed/failed/cancelled)
+            * completed - Number of pages crawled
+            * total - Total pages to crawl
+            * creditsUsed - API credits consumed
+            * expiresAt - Data expiration timestamp
+            
+            Results:
+            * data - List of crawled documents
+            * next - URL for next page of results (if paginated)
+            * success - Whether status check succeeded
+            * error - Error message if failed
+
+        Raises:
+            Exception: If status check fails
+        """
+        headers = self._prepare_headers()
+        endpoint = f'/v1/crawl/{id}'
+        
+        status_data = await self._async_get_request(
+            f'{self.api_url}{endpoint}',
+            headers
+        )
+
+        if status_data['status'] == 'completed':
+            if 'data' in status_data:
+                data = status_data['data']
+                while 'next' in status_data:
+                    if len(status_data['data']) == 0:
+                        break
+                    next_url = status_data.get('next')
+                    if not next_url:
+                        logger.warning("Expected 'next' URL is missing.")
+                        break
+                    next_data = await self._async_get_request(next_url, headers)
+                    data.extend(next_data.get('data', []))
+                    status_data = next_data
+                status_data['data'] = data
+
+        response = {
+            'status': status_data.get('status'),
+            'total': status_data.get('total'),
+            'completed': status_data.get('completed'),
+            'creditsUsed': status_data.get('creditsUsed'),
+            'expiresAt': status_data.get('expiresAt'),
+            'data': status_data.get('data')
+        }
+
+        if 'error' in status_data:
+            response['error'] = status_data['error']
+
+        if 'next' in status_data:
+            response['next'] = status_data['next']
+
+        return {
+            'success': False if 'error' in status_data else True,
+            **response
+        }
+
+    async def _async_monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int = 2) -> CrawlStatusResponse:
+        """
+        Monitor the status of an asynchronous job until completion.
+
+        Args:
+            id (str): The ID of the job to monitor
+            headers (Dict[str, str]): Headers to include in status check requests
+            poll_interval (int): Seconds between status checks (default: 2)
+
+        Returns:
+            CrawlStatusResponse: The job results if completed successfully
+
+        Raises:
+            Exception: If the job fails or an error occurs during status checks
+        """
+        while True:
+            status_data = await self._async_get_request(
+                f'{self.api_url}/v1/crawl/{id}',
+                headers
+            )
+
+            if status_data['status'] == 'completed':
+                if 'data' in status_data:
+                    data = status_data['data']
+                    while 'next' in status_data:
+                        if len(status_data['data']) == 0:
+                            break
+                        next_url = status_data.get('next')
+                        if not next_url:
+                            logger.warning("Expected 'next' URL is missing.")
+                            break
+                        next_data = await self._async_get_request(next_url, headers)
+                        data.extend(next_data.get('data', []))
+                        status_data = next_data
+                    status_data['data'] = data
+                    return status_data
+                else:
+                    raise Exception('Job completed but no data was returned')
+            elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:
+                await asyncio.sleep(max(poll_interval, 2))
+            else:
+                raise Exception(f'Job failed or was stopped. Status: {status_data["status"]}')
+
+    async def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> MapResponse:
+        """
+        Asynchronously map and discover links from a URL.
+
+        Args:
+          url (str): Target URL to map
+          params (Optional[Dict[str, Any]]): See MapParams model:
+            Discovery Options:
+            * search - Filter pattern for URLs
+            * ignoreSitemap - Skip sitemap.xml
+            * includeSubdomains - Include subdomain links
+            * sitemapOnly - Only use sitemap.xml
+            
+            Limits:
+            * limit - Max URLs to return
+            * timeout - Request timeout (ms)
+
+        Returns:
+          MapResponse with:
+          * Discovered URLs
+          * Success/error status
+
+        Raises:
+          Exception: If mapping fails
+        """
+        headers = self._prepare_headers()
+        json_data = {'url': url}
+        if params:
+            json_data.update(params)
+
+        endpoint = f'/v1/map'
+        response = await self._async_post_request(
+            f'{self.api_url}{endpoint}',
+            json_data,
+            headers
+        )
+
+        if response.get('success') and 'links' in response:
+            return response
+        elif 'error' in response:
+            raise Exception(f'Failed to map URL. Error: {response["error"]}')
+        else:
+            raise Exception(f'Failed to map URL. Error: {response}')
+
+    async def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
+        """
+        Asynchronously extract structured information from URLs.
+
+        Args:
+            urls (List[str]): URLs to extract from
+            params (Optional[ExtractParams]): See ExtractParams model:
+              Extraction Config:
+              * prompt - Custom extraction prompt
+              * schema - JSON schema/Pydantic model
+              * systemPrompt - System context
+              
+              Behavior Options:
+              * allowExternalLinks - Follow external links
+              * enableWebSearch - Enable web search
+              * includeSubdomains - Include subdomains
+              * showSources - Include source URLs
+              
+              Scraping Options:
+              * scrapeOptions - Page scraping config
+
+        Returns:
+          ExtractResponse with:
+          * Structured data matching schema
+          * Source information if requested
+          * Success/error status
+
+        Raises:
+          ValueError: If prompt/schema missing or extraction fails
+        """
+        headers = self._prepare_headers()
+
+        if not params or (not params.get('prompt') and not params.get('schema')):
+            raise ValueError("Either prompt or schema is required")
+
+        schema = params.get('schema')
+        if schema:
+            if hasattr(schema, 'model_json_schema'):
+                schema = schema.model_json_schema()
+
+        request_data = {
+            'urls': urls,
+            'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
+            'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
+            'showSources': params.get('show_sources', params.get('showSources', False)),
+            'schema': schema,
+            'origin': 'api-sdk'
+        }
+
+        if params.get('prompt'):
+            request_data['prompt'] = params['prompt']
+        if params.get('system_prompt'):
+            request_data['systemPrompt'] = params['system_prompt']
+        elif params.get('systemPrompt'):
+            request_data['systemPrompt'] = params['systemPrompt']
+
+        response = await self._async_post_request(
+            f'{self.api_url}/v1/extract',
+            request_data,
+            headers
+        )
+
+        if response.get('success'):
+            job_id = response.get('id')
+            if not job_id:
+                raise Exception('Job ID not returned from extract request.')
+
+            while True:
+                status_data = await self._async_get_request(
+                    f'{self.api_url}/v1/extract/{job_id}',
+                    headers
+                )
+
+                if status_data['status'] == 'completed':
+                    return status_data
+                elif status_data['status'] in ['failed', 'cancelled']:
+                    raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
+
+                await asyncio.sleep(2)
+        else:
+            raise Exception(f'Failed to extract. Error: {response.get("error")}')
+
+    async def check_batch_scrape_status(self, id: str) -> BatchScrapeStatusResponse:
+        """
+        Check the status of an asynchronous batch scrape job.
+
+        Args:
+            id (str): The ID of the batch scrape job
+
+        Returns:
+            BatchScrapeStatusResponse containing:
+            Status Information:
+            * status - Current state (scraping/completed/failed/cancelled)
+            * completed - Number of URLs scraped
+            * total - Total URLs to scrape
+            * creditsUsed - API credits consumed
+            * expiresAt - Data expiration timestamp
+            
+            Results:
+            * data - List of scraped documents
+            * next - URL for next page of results (if paginated)
+            * success - Whether status check succeeded
+            * error - Error message if failed
+
+        Raises:
+            Exception: If status check fails
+        """
+        headers = self._prepare_headers()
+        endpoint = f'/v1/batch/scrape/{id}'
+
+        status_data = await self._async_get_request(
+            f'{self.api_url}{endpoint}',
+            headers
+        )
+
+        if status_data['status'] == 'completed':
+            if 'data' in status_data:
+                data = status_data['data']
+                while 'next' in status_data:
+                    if len(status_data['data']) == 0:
+                        break
+                    next_url = status_data.get('next')
+                    if not next_url:
+                        logger.warning("Expected 'next' URL is missing.")
+                        break
+                    next_data = await self._async_get_request(next_url, headers)
+                    data.extend(next_data.get('data', []))
+                    status_data = next_data
+                status_data['data'] = data
+
+        response = {
+            'status': status_data.get('status'),
+            'total': status_data.get('total'),
+            'completed': status_data.get('completed'),
+            'creditsUsed': status_data.get('creditsUsed'),
+            'expiresAt': status_data.get('expiresAt'),
+            'data': status_data.get('data')
+        }
+
+        if 'error' in status_data:
+            response['error'] = status_data['error']
+
+        if 'next' in status_data:
+            response['next'] = status_data['next']
+
+        return {
+            'success': False if 'error' in status_data else True,
+            **response
+        }
+
+    async def check_batch_scrape_errors(self, id: str) -> CrawlErrorsResponse:
+        """
+        Get information about errors from an asynchronous batch scrape job.
+
+        Args:
+          id (str): The ID of the batch scrape job
+
+        Returns:
+          CrawlErrorsResponse containing:
+            errors (List[Dict[str, str]]): List of errors with fields:
+              * id (str): Error ID
+              * timestamp (str): When the error occurred
+              * url (str): URL that caused the error
+              * error (str): Error message
+          * robotsBlocked (List[str]): List of URLs blocked by robots.txt
+
+        Raises:
+          Exception: If error check fails
+        """
+        headers = self._prepare_headers()
+        return await self._async_get_request(
+            f'{self.api_url}/v1/batch/scrape/{id}/errors',
+            headers
+        )
+
+    async def check_crawl_errors(self, id: str) -> CrawlErrorsResponse:
+        """
+        Get information about errors from an asynchronous crawl job.
+
+        Args:
+            id (str): The ID of the crawl job
+
+        Returns:
+            CrawlErrorsResponse containing:
+            * errors (List[Dict[str, str]]): List of errors with fields:
+                - id (str): Error ID
+                - timestamp (str): When the error occurred
+                - url (str): URL that caused the error
+                - error (str): Error message
+            * robotsBlocked (List[str]): List of URLs blocked by robots.txt
+
+        Raises:
+            Exception: If error check fails
+        """
+        headers = self._prepare_headers()
+        return await self._async_get_request(
+            f'{self.api_url}/v1/crawl/{id}/errors',
+            headers
+        )
+
+    async def cancel_crawl(self, id: str) -> Dict[str, Any]:
+        """
+        Cancel an asynchronous crawl job.
+
+        Args:
+            id (str): The ID of the crawl job to cancel
+
+        Returns:
+            Dict[str, Any] containing:
+            * success (bool): Whether cancellation was successful
+            * error (str, optional): Error message if cancellation failed
+
+        Raises:
+            Exception: If cancellation fails
+        """
+        headers = self._prepare_headers()
+        async with aiohttp.ClientSession() as session:
+            async with session.delete(f'{self.api_url}/v1/crawl/{id}', headers=headers) as response:
+                return await response.json()
+
+    async def get_extract_status(self, job_id: str) -> ExtractResponse[Any]:
+        """
+        Check the status of an asynchronous extraction job.
+
+        Args:
+            job_id (str): The ID of the extraction job
+
+        Returns:
+            ExtractResponse containing:
+            * success (bool): Whether extraction completed successfully
+            * data (Any): Extracted structured data
+            * error (str, optional): Error message if extraction failed
+            * warning (str, optional): Warning message if any
+            * sources (List[str], optional): Source URLs if requested
+
+        Raises:
+            ValueError: If status check fails
+        """
+        headers = self._prepare_headers()
+        try:
+            return await self._async_get_request(
+                f'{self.api_url}/v1/extract/{job_id}',
+                headers
+            )
+        except Exception as e:
+            raise ValueError(str(e))
+
+    async def async_extract(
+            self,
+            urls: List[str],
+            params: Optional[ExtractParams] = None,
+            idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
+        """
+        Initiate an asynchronous extraction job without waiting for completion.
+
+        Args:
+            urls (List[str]): URLs to extract information from
+            params (Optional[ExtractParams]): See ExtractParams model:
+              Extraction Config:
+              * prompt - Custom extraction prompt
+              * schema - JSON schema/Pydantic model
+              * systemPrompt - System context
+              
+              Behavior Options:
+              * allowExternalLinks - Follow external links
+              * enableWebSearch - Enable web search
+              * includeSubdomains - Include subdomains
+              * showSources - Include source URLs
+              
+              Scraping Options:
+              * scrapeOptions - Page scraping config
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+
+        Returns:
+          ExtractResponse containing:
+          * success (bool): Whether job started successfully
+          * id (str): Unique identifier for the job
+          * error (str, optional): Error message if start failed
+
+        Raises:
+          ValueError: If job initiation fails
+        """
+        headers = self._prepare_headers(idempotency_key)
+        
+        schema = params.get('schema') if params else None
+        if schema:
+            if hasattr(schema, 'model_json_schema'):
+                schema = schema.model_json_schema()
+
+        jsonData = {'urls': urls, **(params or {})}
+        request_data = {
+            **jsonData,
+            'allowExternalLinks': params.get('allow_external_links', False) if params else False,
+            'schema': schema,
+            'origin': 'api-sdk'
+        }
+
+        try:
+            return await self._async_post_request(
+                f'{self.api_url}/v1/extract',
+                request_data,
+                headers
+            )
+        except Exception as e:
+            raise ValueError(str(e))
+
+    async def generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
+        """
+        Generate LLMs.txt for a given URL and monitor until completion.
+
+        Args:
+            url (str): Target URL to generate LLMs.txt from
+            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model:
+              Generation Options:
+              * maxUrls - Maximum URLs to process (default: 10)
+              * showFullText - Include full text in output (default: False)
+
+        Returns:
+            GenerateLLMsTextStatusResponse containing:
+            * success (bool): Whether generation completed successfully
+            * status (str): Status of generation (processing/completed/failed)
+            * data (Dict[str, str], optional): Generated text with fields:
+                - llmstxt (str): Generated LLMs.txt content
+                - llmsfulltxt (str, optional): Full version if requested
+            * error (str, optional): Error message if generation failed
+            * expiresAt (str): When the generated data expires
+
+        Raises:
+            Exception: If generation fails
+        """
+        if params is None:
+            params = {}
+
+        if isinstance(params, dict):
+            generation_params = GenerateLLMsTextParams(**params)
+        else:
+            generation_params = params
+
+        response = await self.async_generate_llms_text(url, generation_params)
+        if not response.get('success') or 'id' not in response:
+            return response
+
+        job_id = response['id']
+        while True:
+            status = await self.check_generate_llms_text_status(job_id)
+            
+            if status['status'] == 'completed':
+                return status
+            elif status['status'] == 'failed':
+                raise Exception(f'LLMs.txt generation failed. Error: {status.get("error")}')
+            elif status['status'] != 'processing':
+                break
+
+            await asyncio.sleep(2)
+
+        return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
+
+    async def async_generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
+        """
+        Initiate an asynchronous LLMs.txt generation job without waiting for completion.
+
+        Args:
+          url (str): Target URL to generate LLMs.txt from
+          params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model:
+            Generation Options:
+            * maxUrls - Maximum URLs to process (default: 10)
+            * showFullText - Include full text in output (default: False)
+
+        Returns:
+          GenerateLLMsTextResponse containing:
+          * success (bool): Whether job started successfully
+          * id (str): Unique identifier for the job
+          * error (str, optional): Error message if start failed
+
+        Raises:
+          ValueError: If job initiation fails
+        """
+        if params is None:
+            params = {}
+
+        if isinstance(params, dict):
+            generation_params = GenerateLLMsTextParams(**params)
+        else:
+            generation_params = params
+
+        headers = self._prepare_headers()
+        json_data = {'url': url, **generation_params.dict(exclude_none=True)}
+
+        try:
+            return await self._async_post_request(
+                f'{self.api_url}/v1/llmstxt',
+                json_data,
+                headers
+            )
+        except Exception as e:
+            raise ValueError(str(e))
+
+    async def check_generate_llms_text_status(self, id: str) -> GenerateLLMsTextStatusResponse:
+        """
+        Check the status of an asynchronous LLMs.txt generation job.
+
+        Args:
+            id (str): The ID of the generation job
+
+        Returns:
+            GenerateLLMsTextStatusResponse containing:
+            * success (bool): Whether generation completed successfully
+            * status (str): Status of generation (processing/completed/failed)
+            * data (Dict[str, str], optional): Generated text with fields:
+                - llmstxt (str): Generated LLMs.txt content
+                - llmsfulltxt (str, optional): Full version if requested
+            * error (str, optional): Error message if generation failed
+            * expiresAt (str): When the generated data expires
+
+        Raises:
+            ValueError: If status check fails
+        """
+        headers = self._prepare_headers()
+        try:
+            return await self._async_get_request(
+                f'{self.api_url}/v1/llmstxt/{id}',
+                headers
+            )
+        except Exception as e:
+            raise ValueError(str(e))
+
+    async def deep_research(
+            self,
+            query: str,
+            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, 
+            on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
+            on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
+        """
+        Initiates a deep research operation on a given query and polls until completion, providing real-time updates via callbacks.
+
+        Args:
+          query: Research query or topic to investigate
+
+          params: See DeepResearchParams model:
+            Research Settings:
+              * maxDepth - Maximum research depth (default: 7)
+              * timeLimit - Time limit in seconds (default: 270)
+              * maxUrls - Maximum URLs to process (default: 20)
+
+          Callbacks:
+          * on_activity - Progress callback receiving:
+              {type, status, message, timestamp, depth}
+          * on_source - Source discovery callback receiving:
+              {url, title, description}
+
+        Returns:
+          DeepResearchResponse containing:
+
+          Status:
+          * success - Whether research completed successfully
+          * status - Current state (processing/completed/failed)
+          * error - Error message if failed
+          
+          Results:
+          * id - Unique identifier for the research job
+          * data - Research findings and analysis
+          * sources - List of discovered sources
+          * activities - Research progress log
+          * summaries - Generated research summaries
+
+        Raises:
+          Exception: If research fails
+        """
+        if params is None:
+            params = {}
+
+        if isinstance(params, dict):
+            research_params = DeepResearchParams(**params)
+        else:
+            research_params = params
+
+        response = await self.async_deep_research(query, research_params)
+        if not response.get('success') or 'id' not in response:
+            return response
+
+        job_id = response['id']
+        last_activity_count = 0
+        last_source_count = 0
+
+        while True:
+            status = await self.check_deep_research_status(job_id)
+            
+            if on_activity and 'activities' in status:
+                new_activities = status['activities'][last_activity_count:]
+                for activity in new_activities:
+                    on_activity(activity)
+                last_activity_count = len(status['activities'])
+            
+            if on_source and 'sources' in status:
+                new_sources = status['sources'][last_source_count:]
+                for source in new_sources:
+                    on_source(source)
+                last_source_count = len(status['sources'])
+            
+            if status['status'] == 'completed':
+                return status
+            elif status['status'] == 'failed':
+                raise Exception(f'Deep research failed. Error: {status.get("error")}')
+            elif status['status'] != 'processing':
+                break
+
+            await asyncio.sleep(2)
+
+        return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
+
+    async def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse:
+        """
+        Initiate an asynchronous deep research job without waiting for completion.
+
+        Args:
+            query (str): Research query or topic to investigate
+            params (Optional[Union[Dict[str, Any], DeepResearchParams]]): See DeepResearchParams model:
+              Research Settings:
+              * maxDepth - Maximum research depth (default: 7)
+              * timeLimit - Time limit in seconds (default: 270)
+              * maxUrls - Maximum URLs to process (default: 20)
+
+        Returns:
+          DeepResearchResponse containing:
+          * success (bool): Whether job started successfully
+          * id (str): Unique identifier for the job
+          * error (str, optional): Error message if start failed
+
+        Raises:
+          ValueError: If job initiation fails
+        """
+        if params is None:
+            params = {}
+
+        if isinstance(params, dict):
+            research_params = DeepResearchParams(**params)
+        else:
+            research_params = params
+
+        headers = self._prepare_headers()
+        json_data = {'query': query, **research_params.dict(exclude_none=True)}
+
+        try:
+            return await self._async_post_request(
+                f'{self.api_url}/v1/deep-research',
+                json_data,
+                headers
+            )
+        except Exception as e:
+            raise ValueError(str(e))
+
+    async def check_deep_research_status(self, id: str) -> DeepResearchStatusResponse:
+        """
+        Check the status of an asynchronous deep research job.
+
+        Args:
+            id (str): The ID of the research job
+
+        Returns:
+            DeepResearchStatusResponse containing:
+            * success (bool): Whether research completed successfully
+            * status (str): Current state (processing/completed/failed)
+            * data (Dict[str, Any], optional): Research findings and analysis
+            * error (str, optional): Error message if failed
+            * expiresAt (str): When the research data expires
+            * currentDepth (int): Current research depth
+            * maxDepth (int): Maximum research depth
+            * activities (List[Dict[str, Any]]): Research progress log
+            * sources (List[Dict[str, Any]]): Discovered sources
+            * summaries (List[str]): Generated research summaries
+
+        Raises:
+            ValueError: If status check fails
+        """
+        headers = self._prepare_headers()
+        try:
+            return await self._async_get_request(
+                f'{self.api_url}/v1/deep-research/{id}',
+                headers
+            )
+        except Exception as e:
+            raise ValueError(str(e))
+
+    async def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
+        """
+        Asynchronously search for content using Firecrawl.
+
+        Args:
+          query (str): Search query string
+          params (Optional[Union[Dict[str, Any], SearchParams]]): See SearchParams model:
+            Search Options:
+            * limit - Max results (default: 5)
+            * tbs - Time filter (e.g. "qdr:d")
+            * filter - Custom result filter
+            
+            Localization:
+            * lang - Language code (default: "en")
+            * country - Country code (default: "us")
+            * location - Geo-targeting
+            
+            Request Options:
+            * timeout - Request timeout (ms)
+            * scrapeOptions - Result scraping config
+
+        Returns:
+          SearchResponse containing:
+          * success (bool): Whether search completed successfully
+          * data (List[FirecrawlDocument]): Search results
+          * warning (str, optional): Warning message if any
+          * error (str, optional): Error message if search failed
+
+        Raises:
+          Exception: If search fails
+        """
+        if params is None:
+            params = {}
+
+        if isinstance(params, dict):
+            search_params = SearchParams(query=query, **params)
+        else:
+            search_params = params
+            search_params.query = query
+
+        return await self._async_post_request(
+            f"{self.api_url}/v1/search",
+            search_params.dict(exclude_none=True),
+            {"Authorization": f"Bearer {self.api_key}"}
+        )
+
+class AsyncCrawlWatcher(CrawlWatcher):
+    """
+    Async version of CrawlWatcher that properly handles async operations.
+    """
+    def __init__(self, id: str, app: AsyncFirecrawlApp):
+        super().__init__(id, app)
+
+    async def connect(self) -> None:
+        """
+        Establishes async WebSocket connection and starts listening for messages.
+        """
+        async with websockets.connect(self.ws_url, extra_headers={"Authorization": f"Bearer {self.app.api_key}"}) as websocket:
+            await self._listen(websocket)
+
+    async def _listen(self, websocket) -> None:
+        """
+        Listens for incoming WebSocket messages and handles them asynchronously.
+
+        Args:
+            websocket: The WebSocket connection object
+        """
+        async for message in websocket:
+            msg = json.loads(message)
+            await self._handle_message(msg)
+
+    async def _handle_message(self, msg: Dict[str, Any]) -> None:
+        """
+        Handles incoming WebSocket messages based on their type asynchronously.
+
+        Args:
+            msg (Dict[str, Any]): The message to handle
+        """
+        if msg['type'] == 'done':
+            self.status = 'completed'
+            self.dispatch_event('done', {'status': self.status, 'data': self.data, 'id': self.id})
+        elif msg['type'] == 'error':
+            self.status = 'failed'
+            self.dispatch_event('error', {'status': self.status, 'data': self.data, 'error': msg['error'], 'id': self.id})
+        elif msg['type'] == 'catchup':
+            self.status = msg['data']['status']
+            self.data.extend(msg['data'].get('data', []))
+            for doc in self.data:
+                self.dispatch_event('document', {'data': doc, 'id': self.id})
+        elif msg['type'] == 'document':
+            self.data.append(msg['data'])
+            self.dispatch_event('document', {'data': msg['data'], 'id': self.id})
+
+    async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None:
+        """
+        Handle errors from async API responses.
+        """
+        try:
+            error_data = await response.json()
+            error_message = error_data.get('error', 'No error message provided.')
+            error_details = error_data.get('details', 'No additional error details provided.')
+        except:
+            raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
+
+        if response.status == 402:
+            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
+        elif response.status == 408:
+            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
+        elif response.status == 409:
+            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
+        elif response.status == 500:
+            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
+        else:
+            message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
+
+        raise aiohttp.ClientError(message)
diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt
index 5dcd8f6c..360d9e76 100644
--- a/apps/python-sdk/requirements.txt
+++ b/apps/python-sdk/requirements.txt
@@ -3,4 +3,5 @@ pytest
 python-dotenv
 websockets
 nest-asyncio
-pydantic
\ No newline at end of file
+pydantic
+aiohttp
\ No newline at end of file

From 86f41460e0bb50f009170a11757f27d037103c83 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Thu, 13 Mar 2025 17:00:46 -0300
Subject: [PATCH 03/26] removed v0 in example

---
 apps/python-sdk/example_async.py | 38 --------------------------------
 1 file changed, 38 deletions(-)

diff --git a/apps/python-sdk/example_async.py b/apps/python-sdk/example_async.py
index 7afe6a70..d5251515 100644
--- a/apps/python-sdk/example_async.py
+++ b/apps/python-sdk/example_async.py
@@ -73,42 +73,6 @@ async def example_llm_extraction():
 
     print(llm_extraction_result['extract'])
 
-    # Define schema to extract contents into using json schema
-    json_schema = {
-      "type": "object",
-      "properties": {
-        "top": {
-          "type": "array",
-          "items": {
-            "type": "object",
-            "properties": {
-              "title": {"type": "string"},
-              "points": {"type": "number"},
-              "by": {"type": "string"},
-              "commentsURL": {"type": "string"}
-            },
-            "required": ["title", "points", "by", "commentsURL"]
-          },
-          "minItems": 5,
-          "maxItems": 5,
-          "description": "Top 5 stories on Hacker News"
-        }
-      },
-      "required": ["top"]
-    }
-
-    app2 = AsyncFirecrawlApp(api_key="fc-", version="v0")
-
-    llm_extraction_result = await app2.scrape_url('https://news.ycombinator.com', {
-        'extractorOptions': {
-            'extractionSchema': json_schema,
-            'mode': 'llm-extraction'
-        },
-        'pageOptions':{
-            'onlyMainContent': True
-        }
-    })
-
 async def example_map_and_extract():
     # Map a website:
     map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' })
@@ -153,10 +117,8 @@ async def example_websocket_crawl():
     await watcher.connect()
 
 async def main():
-    # Apply nest_asyncio to allow nested event loops
     nest_asyncio.apply()
     
-    # Run all the examples
     await example_scrape()
     await example_batch_scrape()
     await example_crawl()

From e7db5a2d5b19188e05e840e1e2c7a88098f9bde0 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 14 Mar 2025 07:49:30 -0300
Subject: [PATCH 04/26] tomkosms review

---
 apps/python-sdk/firecrawl/firecrawl.py | 137 +++++++++++++++++++------
 1 file changed, 106 insertions(+), 31 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index e0f8c940..3bc1aa9b 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -108,16 +108,46 @@ class CrawlScrapeOptions(pydantic.BaseModel):
     blockAds: Optional[bool] = None
     proxy: Optional[Literal["basic", "stealth"]] = None
 
-class Action(pydantic.BaseModel):
-    """Action to perform during scraping."""
-    type: Literal["wait", "click", "screenshot", "write", "press", "scroll", "scrape", "executeJavascript"]
-    milliseconds: Optional[int] = None
+class WaitAction(pydantic.BaseModel):
+    """Wait action to perform during scraping."""
+    type: Literal["wait"]
+    milliseconds: int
     selector: Optional[str] = None
+
+class ScreenshotAction(pydantic.BaseModel):
+    """Screenshot action to perform during scraping."""
+    type: Literal["screenshot"]
     fullPage: Optional[bool] = None
-    text: Optional[str] = None
-    key: Optional[str] = None
-    direction: Optional[Literal["up", "down"]] = None
-    script: Optional[str] = None
+
+class ClickAction(pydantic.BaseModel):
+    """Click action to perform during scraping."""
+    type: Literal["click"]
+    selector: str
+
+class WriteAction(pydantic.BaseModel):
+    """Write action to perform during scraping."""
+    type: Literal["write"]
+    text: str
+
+class PressAction(pydantic.BaseModel):
+    """Press action to perform during scraping."""
+    type: Literal["press"]
+    key: str
+
+class ScrollAction(pydantic.BaseModel):
+    """Scroll action to perform during scraping."""
+    type: Literal["scroll"]
+    direction: Literal["up", "down"]
+    selector: Optional[str] = None
+
+class ScrapeAction(pydantic.BaseModel):
+    """Scrape action to perform during scraping."""
+    type: Literal["scrape"]
+
+class ExecuteJavascriptAction(pydantic.BaseModel):
+    """Execute javascript action to perform during scraping."""
+    type: Literal["executeJavascript"]
+    script: str
 
 class ExtractConfig(pydantic.BaseModel):
     """Configuration for extraction."""
@@ -129,7 +159,7 @@ class ScrapeParams(CrawlScrapeOptions):
     """Parameters for scraping operations."""
     extract: Optional[ExtractConfig] = None
     jsonOptions: Optional[ExtractConfig] = None
-    actions: Optional[List[Action]] = None
+    actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
 
 class ScrapeResponse(FirecrawlDocument[T], Generic[T]):
     """Response from scraping operations."""
@@ -240,7 +270,7 @@ class SearchParams(pydantic.BaseModel):
     location: Optional[str] = None
     origin: Optional[str] = "api"
     timeout: Optional[int] = 60000
-    scrapeOptions: Optional[Dict[str, Any]] = None
+    scrapeOptions: Optional[CrawlScrapeOptions] = None
 
 class SearchResponse(pydantic.BaseModel):
     """Response from search operations."""
@@ -295,10 +325,14 @@ class GenerateLLMsTextResponse(pydantic.BaseModel):
     id: str
     error: Optional[str] = None
 
+class GenerateLLMsTextStatusResponseData(pydantic.BaseModel):
+    llmstxt: str
+    llmsfulltxt: Optional[str] = None
+
 class GenerateLLMsTextStatusResponse(pydantic.BaseModel):
     """Status response from LLMs.txt generation operations."""
     success: bool = True
-    data: Optional[Dict[str, str]] = None  # {llmstxt: str, llmsfulltxt?: str}
+    data: Optional[GenerateLLMsTextStatusResponseData] = None
     status: Literal["processing", "completed", "failed"]
     error: Optional[str] = None
     expiresAt: str
@@ -322,13 +356,16 @@ class FirecrawlApp:
             
         logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}")
 
-    def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> ScrapeResponse[Any]:
+    def scrape_url(
+            self,
+            url: str,
+            params: Optional[ScrapeParams] = None) -> ScrapeResponse[Any]:
         """
         Scrape and extract content from a URL.
 
         Args:
           url (str): Target URL to scrape
-          params (Optional[Dict[str, Any]]): See ScrapeParams model for configuration:
+          params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
             Content Options:
             * formats - Content types to retrieve (markdown/html/etc)
             * includeTags - HTML tags to include
@@ -410,7 +447,10 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'scrape URL')
 
-    def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
+    def search(
+            self,
+            query: str,
+            params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
         """
         Search for content using Firecrawl.
 
@@ -520,14 +560,18 @@ class FirecrawlApp:
             self._handle_error(response, 'start crawl job')
 
 
-    def async_crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> CrawlResponse:
+    def async_crawl_url(
+            self,
+            url: str,
+            params: Optional[CrawlParams] = None,
+            idempotency_key: Optional[str] = None) -> CrawlResponse:
         """
         Start an asynchronous crawl job.
 
         Args:
             url (str): Target URL to start crawling from
 
-            params (Optional[Dict[str, Any]]): See CrawlParams model:
+            params (Optional[CrawlParams]): See CrawlParams model:
 
               URL Discovery:
               * includePaths - Patterns of URLs to include
@@ -754,7 +798,10 @@ class FirecrawlApp:
         else:
             raise Exception("Crawl job failed to start")
 
-    def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> MapResponse:
+    def map_url(
+            self,
+            url: str,
+            params: Optional[MapParams] = None) -> MapResponse:
         """
         Map and discover links from a URL.
 
@@ -1891,7 +1938,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
                         if response.status == 502:
                             await asyncio.sleep(backoff_factor * (2 ** attempt))
                             continue
-                        if response.status != 200:
+                        if response.status >= 300: 
                             await self._handle_error(response, "make POST request")
                         return await response.json()
                 except aiohttp.ClientError as e:
@@ -1930,7 +1977,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
                         if response.status == 502:
                             await asyncio.sleep(backoff_factor * (2 ** attempt))
                             continue
-                        if response.status != 200:
+                        if response.status >= 300:  # Accept any 2xx status code as success
                             await self._handle_error(response, "make GET request")
                         return await response.json()
                 except aiohttp.ClientError as e:
@@ -2060,13 +2107,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
         else:
             raise Exception("Batch scrape job failed to start")
 
-    async def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> ScrapeResponse[Any]:
+    async def scrape_url(
+            self,
+            url: str,
+            params: Optional[ScrapeParams] = None) -> ScrapeResponse[Any]:
         """
         Asynchronously scrape and extract content from a URL.
 
         Args:
             url (str): Target URL to scrape
-            params (Optional[Dict[str, Any]]): See ScrapeParams model for configuration:
+            params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
               Content Options:
               * formats - Content types to retrieve (markdown/html/etc)
               * includeTags - HTML tags to include
@@ -2122,7 +2172,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
         else:
             raise Exception(f'Failed to scrape URL. Error: {response}')
 
-    async def batch_scrape_urls(self, urls: List[str], params: Optional[ScrapeParams] = None) -> BatchScrapeStatusResponse:
+    async def batch_scrape_urls(
+            self,
+            urls: List[str],
+            params: Optional[ScrapeParams] = None) -> BatchScrapeStatusResponse:
         """
         Asynchronously scrape multiple URLs and monitor until completion.
 
@@ -2282,13 +2335,17 @@ class AsyncFirecrawlApp(FirecrawlApp):
         else:
             raise Exception(f'Failed to start crawl. Error: {response.get("error")}')
 
-    async def async_crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> CrawlResponse:
+    async def async_crawl_url(
+            self,
+            url: str,
+            params: Optional[CrawlParams] = None,
+            idempotency_key: Optional[str] = None) -> CrawlResponse:
         """
         Initiate an asynchronous crawl job without waiting for completion.
 
         Args:
             url (str): Target URL to start crawling from
-            params (Optional[Dict[str, Any]]): See CrawlParams model:
+            params (Optional[CrawlParams]): See CrawlParams model:
               URL Discovery:
               * includePaths - Patterns of URLs to include
               * excludePaths - Patterns of URLs to exclude
@@ -2442,13 +2499,16 @@ class AsyncFirecrawlApp(FirecrawlApp):
             else:
                 raise Exception(f'Job failed or was stopped. Status: {status_data["status"]}')
 
-    async def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> MapResponse:
+    async def map_url(
+            self,
+            url: str,
+            params: Optional[MapParams] = None) -> MapResponse:
         """
         Asynchronously map and discover links from a URL.
 
         Args:
           url (str): Target URL to map
-          params (Optional[Dict[str, Any]]): See MapParams model:
+          params (Optional[MapParams]): See MapParams model:
             Discovery Options:
             * search - Filter pattern for URLs
             * ignoreSitemap - Skip sitemap.xml
@@ -2486,7 +2546,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
         else:
             raise Exception(f'Failed to map URL. Error: {response}')
 
-    async def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
+    async def extract(
+            self,
+            urls: List[str],
+            params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
         """
         Asynchronously extract structured information from URLs.
 
@@ -2792,7 +2855,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
         except Exception as e:
             raise ValueError(str(e))
 
-    async def generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
+    async def generate_llms_text(
+            self,
+            url: str,
+            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
         """
         Generate LLMs.txt for a given URL and monitor until completion.
 
@@ -2843,7 +2909,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
 
         return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
 
-    async def async_generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
+    async def async_generate_llms_text(
+            self,
+            url: str,
+            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
         """
         Initiate an asynchronous LLMs.txt generation job without waiting for completion.
 
@@ -2996,7 +3065,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
 
         return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
 
-    async def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse:
+    async def async_deep_research(
+            self,
+            query: str,
+            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse:
         """
         Initiate an asynchronous deep research job without waiting for completion.
 
@@ -3069,7 +3141,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
         except Exception as e:
             raise ValueError(str(e))
 
-    async def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
+    async def search(
+            self,
+            query: str,
+            params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
         """
         Asynchronously search for content using Firecrawl.
 

From 97695dd55b987b12641739da20872e7e92e15eb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adem=C3=ADlson=20F=2E=20Tonato?= <ademilsonft@outlook.com>
Date: Fri, 14 Mar 2025 19:53:57 +0000
Subject: [PATCH 05/26] refator: dry request and error handling

---
 apps/python-sdk/firecrawl/firecrawl.py | 209 +++++++++++++++----------
 1 file changed, 123 insertions(+), 86 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 3bc1aa9b..d62312c6 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -1624,21 +1624,35 @@ class FirecrawlApp:
         except:
             raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
         
-
-        if response.status_code == 402:
-            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
-        elif response.status_code == 408:
-            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
-        elif response.status_code == 409:
-            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
-        elif response.status_code == 500:
-            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
-        else:
-            message = f"Unexpected error during {action}: Status code {response.status_code}. {error_message} - {error_details}"
+        message = self._get_error_message(response.status_code, action, error_message, error_details)
 
         # Raise an HTTPError with the custom message and attach the response
         raise requests.exceptions.HTTPError(message, response=response)
 
+    def _get_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
+        """
+        Generate a standardized error message based on HTTP status code.
+        
+        Args:
+            status_code (int): The HTTP status code from the response
+            action (str): Description of the action that was being performed
+            error_message (str): The error message from the API response
+            error_details (str): Additional error details from the API response
+            
+        Returns:
+            str: A formatted error message
+        """
+        if status_code == 402:
+            return f"Payment Required: Failed to {action}. {error_message} - {error_details}"
+        elif status_code == 408:
+            return f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
+        elif status_code == 409:
+            return f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
+        elif status_code == 500:
+            return f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
+        else:
+            return f"Unexpected error during {action}: Status code {status_code}. {error_message} - {error_details}"
+
     def deep_research(
             self,
             query: str,
@@ -1905,86 +1919,96 @@ class AsyncFirecrawlApp(FirecrawlApp):
     Asynchronous version of FirecrawlApp that implements async methods using aiohttp.
     Provides non-blocking alternatives to all FirecrawlApp operations.
     """
-    
-    async def _async_post_request(
+
+    async def _async_request(
             self,
+            method: str,
             url: str,
-            data: Dict[str, Any],
             headers: Dict[str, str],
+            data: Optional[Dict[str, Any]] = None,
             retries: int = 3,
             backoff_factor: float = 0.5) -> Dict[str, Any]:
         """
+        Generic async request method with exponential backoff retry logic.
+
+        Args:
+            method (str): The HTTP method to use (e.g., "GET" or "POST").
+            url (str): The URL to send the request to.
+            headers (Dict[str, str]): Headers to include in the request.
+            data (Optional[Dict[str, Any]]): The JSON data to include in the request body (only for POST requests).
+            retries (int): Maximum number of retry attempts (default: 3).
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
+                Delay will be backoff_factor * (2 ** retry_count).
+
+        Returns:
+            Dict[str, Any]: The parsed JSON response from the server.
+
+        Raises:
+            aiohttp.ClientError: If the request fails after all retries.
+            Exception: If max retries are exceeded or other errors occur.
+        """
+        async with aiohttp.ClientSession() as session:
+            for attempt in range(retries):
+                try:
+                    async with session.request(
+                        method=method, url=url, headers=headers, json=data
+                    ) as response:
+                        if response.status == 502:
+                            await asyncio.sleep(backoff_factor * (2 ** attempt))
+                            continue
+                        if response.status >= 300:
+                            await self._handle_error(response, f"make {method} request")
+                        return await response.json()
+                except aiohttp.ClientError as e:
+                    if attempt == retries - 1:
+                        raise e
+                    await asyncio.sleep(backoff_factor * (2 ** attempt))
+            raise Exception("Max retries exceeded")
+
+    async def _async_post_request(
+            self, url: str, data: Dict[str, Any], headers: Dict[str, str],
+            retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]:
+        """
         Make an async POST request with exponential backoff retry logic.
 
         Args:
-            url (str): The URL to send the POST request to
-            data (Dict[str, Any]): The JSON data to include in the request body
-            headers (Dict[str, str]): Headers to include in the request
-            retries (int): Maximum number of retry attempts (default: 3)
-            backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
-                Delay will be backoff_factor * (2 ** retry_count)
+            url (str): The URL to send the POST request to.
+            data (Dict[str, Any]): The JSON data to include in the request body.
+            headers (Dict[str, str]): Headers to include in the request.
+            retries (int): Maximum number of retry attempts (default: 3).
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
+                Delay will be backoff_factor * (2 ** retry_count).
 
         Returns:
-            Dict[str, Any]: The parsed JSON response from the server
+            Dict[str, Any]: The parsed JSON response from the server.
 
         Raises:
-            aiohttp.ClientError: If the request fails after all retries
-            Exception: If max retries are exceeded or other errors occur
+            aiohttp.ClientError: If the request fails after all retries.
+            Exception: If max retries are exceeded or other errors occur.
         """
-        async with aiohttp.ClientSession() as session:
-            for attempt in range(retries):
-                try:
-                    async with session.post(url, headers=headers, json=data) as response:
-                        if response.status == 502:
-                            await asyncio.sleep(backoff_factor * (2 ** attempt))
-                            continue
-                        if response.status >= 300: 
-                            await self._handle_error(response, "make POST request")
-                        return await response.json()
-                except aiohttp.ClientError as e:
-                    if attempt == retries - 1:
-                        raise e
-                    await asyncio.sleep(backoff_factor * (2 ** attempt))
-            raise Exception("Max retries exceeded")
+        return await self._async_request("POST", url, headers, data, retries, backoff_factor)
 
     async def _async_get_request(
-            self,
-            url: str,
-            headers: Dict[str, str],
-            retries: int = 3,
-            backoff_factor: float = 0.5) -> Dict[str, Any]:
+            self, url: str, headers: Dict[str, str],
+            retries: int = 3, backoff_factor: float = 0.5) -> Dict[str, Any]:
         """
         Make an async GET request with exponential backoff retry logic.
 
         Args:
-            url (str): The URL to send the GET request to
-            headers (Dict[str, str]): Headers to include in the request
-            retries (int): Maximum number of retry attempts (default: 3)
-            backoff_factor (float): Factor to calculate delay between retries (default: 0.5)
-                Delay will be backoff_factor * (2 ** retry_count)
+            url (str): The URL to send the GET request to.
+            headers (Dict[str, str]): Headers to include in the request.
+            retries (int): Maximum number of retry attempts (default: 3).
+            backoff_factor (float): Factor to calculate delay between retries (default: 0.5).
+                Delay will be backoff_factor * (2 ** retry_count).
 
         Returns:
-            Dict[str, Any]: The parsed JSON response from the server
+            Dict[str, Any]: The parsed JSON response from the server.
 
         Raises:
-            aiohttp.ClientError: If the request fails after all retries
-            Exception: If max retries are exceeded or other errors occur
+            aiohttp.ClientError: If the request fails after all retries.
+            Exception: If max retries are exceeded or other errors occur.
         """
-        async with aiohttp.ClientSession() as session:
-            for attempt in range(retries):
-                try:
-                    async with session.get(url, headers=headers) as response:
-                        if response.status == 502:
-                            await asyncio.sleep(backoff_factor * (2 ** attempt))
-                            continue
-                        if response.status >= 300:  # Accept any 2xx status code as success
-                            await self._handle_error(response, "make GET request")
-                        return await response.json()
-                except aiohttp.ClientError as e:
-                    if attempt == retries - 1:
-                        raise e
-                    await asyncio.sleep(backoff_factor * (2 ** attempt))
-            raise Exception("Max retries exceeded")
+        return await self._async_request("GET", url, headers, None, retries, backoff_factor)
 
     async def _handle_error(self, response: aiohttp.ClientResponse, action: str) -> None:
         """
@@ -2009,19 +2033,25 @@ class AsyncFirecrawlApp(FirecrawlApp):
         except:
             raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
 
-        if response.status == 402:
-            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
-        elif response.status == 408:
-            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
-        elif response.status == 409:
-            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
-        elif response.status == 500:
-            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
-        else:
-            message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
+        message = await self._get_async_error_message(response.status, action, error_message, error_details)
 
         raise aiohttp.ClientError(message)
 
+    async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
+        """
+        Generate a standardized error message based on HTTP status code for async operations.
+        
+        Args:
+            status_code (int): The HTTP status code from the response
+            action (str): Description of the action that was being performed
+            error_message (str): The error message from the API response
+            error_details (str): Additional error details from the API response
+            
+        Returns:
+            str: A formatted error message
+        """
+        return self._get_error_message(status_code, action, error_message, error_details)
+
     async def crawl_url_and_watch(
             self,
             url: str,
@@ -3248,15 +3278,22 @@ class AsyncCrawlWatcher(CrawlWatcher):
         except:
             raise aiohttp.ClientError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status}')
 
-        if response.status == 402:
-            message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
-        elif response.status == 408:
-            message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
-        elif response.status == 409:
-            message = f"Conflict: Failed to {action} due to a conflict. {error_message} - {error_details}"
-        elif response.status == 500:
-            message = f"Internal Server Error: Failed to {action}. {error_message} - {error_details}"
-        else:
-            message = f"Unexpected error during {action}: Status code {response.status}. {error_message} - {error_details}"
+        # Use the app's method to get the error message
+        message = await self.app._get_async_error_message(response.status, action, error_message, error_details)
 
         raise aiohttp.ClientError(message)
+
+    async def _get_async_error_message(self, status_code: int, action: str, error_message: str, error_details: str) -> str:
+        """
+        Generate a standardized error message based on HTTP status code for async operations.
+        
+        Args:
+            status_code (int): The HTTP status code from the response
+            action (str): Description of the action that was being performed
+            error_message (str): The error message from the API response
+            error_details (str): Additional error details from the API response
+            
+        Returns:
+            str: A formatted error message
+        """
+        return self._get_error_message(status_code, action, error_message, error_details)

From cc255d488eefd953d14420cb7e7ef3b4d1cf4911 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 14 Mar 2025 18:27:42 -0300
Subject: [PATCH 06/26] fixed websocket params

---
 apps/python-sdk/firecrawl/firecrawl.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index d62312c6..990599cc 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -1855,7 +1855,10 @@ class CrawlWatcher:
         """
         Establishes WebSocket connection and starts listening for messages.
         """
-        async with websockets.connect(self.ws_url, extra_headers={"Authorization": f"Bearer {self.app.api_key}"}) as websocket:
+        async with websockets.connect(
+            self.ws_url,
+            additional_headers=[("Authorization", f"Bearer {self.app.api_key}")]
+        ) as websocket:
             await self._listen(websocket)
 
     async def _listen(self, websocket) -> None:
@@ -3231,7 +3234,10 @@ class AsyncCrawlWatcher(CrawlWatcher):
         """
         Establishes async WebSocket connection and starts listening for messages.
         """
-        async with websockets.connect(self.ws_url, extra_headers={"Authorization": f"Bearer {self.app.api_key}"}) as websocket:
+        async with websockets.connect(
+            self.ws_url,
+            additional_headers=[("Authorization", f"Bearer {self.app.api_key}")]
+        ) as websocket:
             await self._listen(websocket)
 
     async def _listen(self, websocket) -> None:

From 4f984d3fded33517ec52694ab2b22fcb6cc42e52 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Wed, 19 Mar 2025 09:45:51 -0300
Subject: [PATCH 07/26] added origin to requests

---
 apps/api/src/controllers/v1/extract.ts |  4 +-
 apps/js-sdk/firecrawl/src/index.ts     | 47 +++++++++++++++++-------
 apps/python-sdk/firecrawl/firecrawl.py | 51 ++++++++++++++++++++++----
 3 files changed, 79 insertions(+), 23 deletions(-)

diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts
index b18117f5..31c848b7 100644
--- a/apps/api/src/controllers/v1/extract.ts
+++ b/apps/api/src/controllers/v1/extract.ts
@@ -60,7 +60,9 @@ export async function extractController(
   if (
     (await getTeamIdSyncB(req.auth.team_id)) &&
     req.body.origin !== "api-sdk" &&
-    req.body.origin !== "website"
+    req.body.origin !== "website" &&
+    !req.body.origin.startsWith("python-sdk@") &&
+    !req.body.origin.startsWith("js-sdk@")
   ) {
     return await oldExtract(req, res, extractId);
   }
diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts
index ab09432e..8e4eca61 100644
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@@ -474,11 +474,26 @@ export interface GenerateLLMsTextStatusResponse {
 export default class FirecrawlApp {
   public apiKey: string;
   public apiUrl: string;
-
+  public version: string = "1.19.1";
+  
   private isCloudService(url: string): boolean {
     return url.includes('api.firecrawl.dev');
   }
 
+  private async getVersion(): Promise<string> {
+    try {
+      const packageJson = await import('../package.json', { assert: { type: 'json' } });
+      return packageJson.default.version;
+    } catch (error) {
+      console.error("Error getting version:", error);
+      return "1.19.1";
+    }
+  }
+
+  private async init() {
+    this.version = await this.getVersion();
+  }
+
   /**
    * Initializes a new instance of the FirecrawlApp class.
    * @param config - Configuration options for the FirecrawlApp instance.
@@ -492,6 +507,7 @@ export default class FirecrawlApp {
 
     this.apiKey = apiKey || '';
     this.apiUrl = baseUrl;
+    this.init();
   }
 
   /**
@@ -508,7 +524,7 @@ export default class FirecrawlApp {
       "Content-Type": "application/json",
       Authorization: `Bearer ${this.apiKey}`,
     } as AxiosRequestHeaders;
-    let jsonData: any = { url, ...params };
+    let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
     if (jsonData?.extract?.schema) {
       let schema = jsonData.extract.schema;
 
@@ -590,7 +606,7 @@ export default class FirecrawlApp {
       lang: params?.lang ?? "en",
       country: params?.country ?? "us",
       location: params?.location,
-      origin: params?.origin ?? "api",
+      origin: `js-sdk@${this.version}`,
       timeout: params?.timeout ?? 60000,
       scrapeOptions: params?.scrapeOptions ?? { formats: [] },
     };
@@ -662,7 +678,7 @@ export default class FirecrawlApp {
     idempotencyKey?: string
   ): Promise<CrawlStatusResponse | ErrorResponse> {
     const headers = this.prepareHeaders(idempotencyKey);
-    let jsonData: any = { url, ...params };
+    let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
     try {
       const response: AxiosResponse = await this.postRequest(
         this.apiUrl + `/v1/crawl`,
@@ -691,7 +707,7 @@ export default class FirecrawlApp {
     idempotencyKey?: string
   ): Promise<CrawlResponse | ErrorResponse> {
     const headers = this.prepareHeaders(idempotencyKey);
-    let jsonData: any = { url, ...params };
+    let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
     try {
       const response: AxiosResponse = await this.postRequest(
         this.apiUrl + `/v1/crawl`,
@@ -867,7 +883,7 @@ export default class FirecrawlApp {
    */
   async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
     const headers = this.prepareHeaders();
-    let jsonData: { url: string } & MapParams = { url, ...params };
+    let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
 
     try {
       const response: AxiosResponse = await this.postRequest(
@@ -904,7 +920,7 @@ export default class FirecrawlApp {
     ignoreInvalidURLs?: boolean,
   ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
     const headers = this.prepareHeaders(idempotencyKey);
-    let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
+    let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
     if (jsonData?.extract?.schema) {
       let schema = jsonData.extract.schema;
 
@@ -969,7 +985,7 @@ export default class FirecrawlApp {
     ignoreInvalidURLs?: boolean,
   ): Promise<BatchScrapeResponse | ErrorResponse> {
     const headers = this.prepareHeaders(idempotencyKey);
-    let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
+    let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
     try {
       const response: AxiosResponse = await this.postRequest(
         this.apiUrl + `/v1/batch/scrape`,
@@ -1143,7 +1159,7 @@ export default class FirecrawlApp {
     try {
       const response: AxiosResponse = await this.postRequest(
         this.apiUrl + `/v1/extract`,
-        { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
+        { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
         headers
       );
 
@@ -1211,7 +1227,7 @@ export default class FirecrawlApp {
     try {
       const response: AxiosResponse = await this.postRequest(
         this.apiUrl + `/v1/extract`,
-        { ...jsonData, schema: jsonSchema },
+        { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
         headers
       );
 
@@ -1497,10 +1513,11 @@ export default class FirecrawlApp {
    */
   async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
     const headers = this.prepareHeaders();
+    let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
     try {
       const response: AxiosResponse = await this.postRequest(
-        `${this.apiUrl}/v1/deep-research`,
-        { query, ...params },
+        this.apiUrl + `/v1/deep-research`,
+        jsonData,
         headers
       );
 
@@ -1632,9 +1649,10 @@ export default class FirecrawlApp {
   async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
     const headers = this.prepareHeaders();
     try {
+      let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
       const response: AxiosResponse = await this.postRequest(
         `${this.apiUrl}/v1/deep-research`,
-        { topic, ...params },
+        jsonData,
         headers
       );
 
@@ -1744,10 +1762,11 @@ export default class FirecrawlApp {
    */
   async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
     const headers = this.prepareHeaders();
+    let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
     try {
       const response: AxiosResponse = await this.postRequest(
         `${this.apiUrl}/v1/llmstxt`,
-        { url, ...params },
+        jsonData,
         headers
       );
 
diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 990599cc..adedce9c 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -15,6 +15,7 @@ import time
 from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar, Generic
 import json
 from datetime import datetime
+import re
 
 import requests
 import pydantic
@@ -22,6 +23,20 @@ import websockets
 import aiohttp
 import asyncio
 
+def get_version():
+  try:
+      from pathlib import Path
+      package_path = os.path.dirname(__file__)
+      version_file = Path(os.path.join(package_path, '__init__.py')).read_text()
+      version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
+      if version_match:
+          return version_match.group(1).strip()
+  except Exception:
+      print("Failed to get version from __init__.py")
+      return None
+
+version = get_version()
+
 logger : logging.Logger = logging.getLogger("firecrawl")
 
 T = TypeVar('T')
@@ -424,6 +439,7 @@ class FirecrawlApp:
                 if key not in ['jsonOptions']:
                     scrape_params[key] = value
 
+            scrape_params['origin'] = f"python-sdk@{version}"
 
         endpoint = f'/v1/scrape'
         # Make the POST request with the prepared headers and JSON data
@@ -489,10 +505,13 @@ class FirecrawlApp:
             search_params = params
             search_params.query = query
 
+        params_dict = search_params.dict(exclude_none=True)
+        params_dict['origin'] = f"python-sdk@{version}"
+
         response = requests.post(
             f"{self.api_url}/v1/search",
             headers={"Authorization": f"Bearer {self.api_key}"},
-            json=search_params.dict(exclude_none=True)
+            json=params_dict
         )
 
         if response.status_code != 200:
@@ -548,6 +567,7 @@ class FirecrawlApp:
         json_data = {'url': url}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
         response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
         if response.status_code == 200:
             try:
@@ -609,6 +629,7 @@ class FirecrawlApp:
         json_data = {'url': url}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
         response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
         if response.status_code == 200:
             try:
@@ -835,6 +856,7 @@ class FirecrawlApp:
         json_data = {'url': url}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
 
         # Make the POST request with the prepared headers and JSON data
         response = requests.post(
@@ -897,6 +919,7 @@ class FirecrawlApp:
         json_data = {'urls': urls}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
         response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
         if response.status_code == 200:
             try:
@@ -953,6 +976,7 @@ class FirecrawlApp:
         json_data = {'urls': urls}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
         response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
         if response.status_code == 200:
             try:
@@ -1153,7 +1177,7 @@ class FirecrawlApp:
             'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)), 
             'showSources': params.get('show_sources', params.get('showSources', False)),
             'schema': schema,
-            'origin': 'api-sdk'
+            'origin': f'python-sdk@{get_version()}'
         }
 
         # Only add prompt and systemPrompt if they exist
@@ -1284,7 +1308,7 @@ class FirecrawlApp:
             **jsonData,
             'allowExternalLinks': params.get('allow_external_links', False) if params else False,
             'schema': schema,
-            'origin': 'api-sdk'
+            'origin': f'python-sdk@{version}'
         }
 
         try:
@@ -1387,6 +1411,7 @@ class FirecrawlApp:
 
         headers = self._prepare_headers()
         json_data = {'url': url, **generation_params.dict(exclude_none=True)}
+        json_data['origin'] = f"python-sdk@{version}"
 
         try:
             response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers)
@@ -1770,6 +1795,7 @@ class FirecrawlApp:
 
         headers = self._prepare_headers()
         json_data = {'query': query, **research_params.dict(exclude_none=True)}
+        json_data['origin'] = f"python-sdk@{version}"
 
         try:
             response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers)
@@ -2178,7 +2204,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
             Exception: If scraping fails
         """
         headers = self._prepare_headers()
-        scrape_params = {'url': url}
+        scrape_params = {'url': url, 'origin': f'python-sdk@{version}'}
 
         if params:
             extract = params.get('extract', {})
@@ -2245,6 +2271,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
         json_data = {'urls': urls}
         if params:
             json_data.update(params)
+            json_data['origin'] = f"python-sdk@{version}"
 
         endpoint = f'/v1/batch/scrape'
         response = await self._async_post_request(
@@ -2301,6 +2328,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
         json_data = {'urls': urls}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
 
         endpoint = f'/v1/batch/scrape'
         return await self._async_post_request(
@@ -2355,6 +2383,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
         json_data = {'url': url}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
 
         endpoint = f'/v1/crawl'
         response = await self._async_post_request(
@@ -2413,6 +2442,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
         json_data = {'url': url}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
 
         endpoint = f'/v1/crawl'
         return await self._async_post_request(
@@ -2564,6 +2594,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
         json_data = {'url': url}
         if params:
             json_data.update(params)
+        json_data['origin'] = f"python-sdk@{version}"
 
         endpoint = f'/v1/map'
         response = await self._async_post_request(
@@ -2628,7 +2659,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
             'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
             'showSources': params.get('show_sources', params.get('showSources', False)),
             'schema': schema,
-            'origin': 'api-sdk'
+            'origin': f'python-sdk@{version}'
         }
 
         if params.get('prompt'):
@@ -2876,7 +2907,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
             **jsonData,
             'allowExternalLinks': params.get('allow_external_links', False) if params else False,
             'schema': schema,
-            'origin': 'api-sdk'
+            'origin': f'python-sdk@{version}'
         }
 
         try:
@@ -2975,6 +3006,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
 
         headers = self._prepare_headers()
         json_data = {'url': url, **generation_params.dict(exclude_none=True)}
+        json_data['origin'] = f"python-sdk@{version}"
 
         try:
             return await self._async_post_request(
@@ -3132,7 +3164,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
 
         headers = self._prepare_headers()
         json_data = {'query': query, **research_params.dict(exclude_none=True)}
-
+        json_data['origin'] = f"python-sdk@{version}"
         try:
             return await self._async_post_request(
                 f'{self.api_url}/v1/deep-research',
@@ -3217,9 +3249,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
             search_params = params
             search_params.query = query
 
+        search_params_dict = search_params.dict(exclude_none=True)
+        search_params_dict['origin'] = f"python-sdk@{version}"
+
         return await self._async_post_request(
             f"{self.api_url}/v1/search",
-            search_params.dict(exclude_none=True),
+            search_params_dict,
             {"Authorization": f"Bearer {self.api_key}"}
         )
 

From d9780412f539579227b042b1826fca38a7efaf06 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Thu, 17 Apr 2025 23:08:33 -0700
Subject: [PATCH 08/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 8ca94fa6..d168a6b5 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -364,7 +364,6 @@ class ChangeTrackingData(pydantic.BaseModel):
     diff: Optional[Dict[str, Any]] = None
     json: Optional[Any] = None
 
-class FirecrawlApp:
     class SearchResponse(pydantic.BaseModel):
         """
         Response from the search operation.
@@ -399,6 +398,9 @@ class FirecrawlApp:
         data: Optional[Any] = None
         error: Optional[str] = None
 
+class FirecrawlApp:
+
+
     def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
         """
         Initialize the FirecrawlApp instance with API key, API URL.

From f48937a55dc733d74e3326d4082c8d57e1e6a770 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Thu, 17 Apr 2025 23:17:00 -0700
Subject: [PATCH 09/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index d168a6b5..8fead0e6 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -16,12 +16,20 @@ from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar,
 import json
 from datetime import datetime
 import re
-
+import warnings
 import requests
 import pydantic
 import websockets
 import aiohttp
 import asyncio
+from pydantic import Field
+
+# Suppress Pydantic warnings about attribute shadowing
+warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
+warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
+warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractConfig\" shadows an attribute in parent \"BaseModel\"")
+warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
+
 
 def get_version():
   try:

From 22cfdd6ae3bb396469d48dcc93564efae0254e71 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Thu, 17 Apr 2025 23:31:28 -0700
Subject: [PATCH 10/26] added agent options types

---
 apps/python-sdk/firecrawl/firecrawl.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 8ca94fa6..d7abf336 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -76,6 +76,15 @@ class FirecrawlDocumentMetadata(pydantic.BaseModel):
     statusCode: Optional[int] = None
     error: Optional[str] = None
 
+class AgentOptions(pydantic.BaseModel):
+    """Configuration for the agent."""
+    model: Literal["FIRE-1"] = "FIRE-1"
+    prompt: Optional[str] = None
+
+class AgentOptionsExtract(pydantic.BaseModel):
+    """Configuration for the agent in extract operations."""
+    model: Literal["FIRE-1"] = "FIRE-1"
+
 class ActionsResult(pydantic.BaseModel):
     """Result of actions performed during scraping."""
     screenshots: List[str]
@@ -164,17 +173,24 @@ class ExecuteJavascriptAction(pydantic.BaseModel):
     type: Literal["executeJavascript"]
     script: str
 
+
+class ExtractAgent(pydantic.BaseModel):
+    """Configuration for the agent in extract operations."""
+    model: Literal["FIRE-1"] = "FIRE-1"
+
 class ExtractConfig(pydantic.BaseModel):
     """Configuration for extraction."""
     prompt: Optional[str] = None
     schema: Optional[Any] = None
     systemPrompt: Optional[str] = None
+    agent: Optional[ExtractAgent] = None
 
 class ScrapeParams(CrawlScrapeOptions):
     """Parameters for scraping operations."""
     extract: Optional[ExtractConfig] = None
     jsonOptions: Optional[ExtractConfig] = None
     actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
+    agent: Optional[AgentOptions] = None
 
 class ScrapeResponse(FirecrawlDocument[T], Generic[T]):
     """Response from scraping operations."""
@@ -363,7 +379,7 @@ class ChangeTrackingData(pydantic.BaseModel):
     visibility: str  # "visible" | "hidden"
     diff: Optional[Dict[str, Any]] = None
     json: Optional[Any] = None
-
+    
 class FirecrawlApp:
     class SearchResponse(pydantic.BaseModel):
         """

From 8eb4e1a96a4ad213ac1d270ee776e58ef42d9ea1 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Thu, 17 Apr 2025 23:50:56 -0700
Subject: [PATCH 11/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 131 ++++++++++++++-----------
 1 file changed, 74 insertions(+), 57 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 8fead0e6..b23b60c9 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -115,7 +115,7 @@ class WebhookConfig(pydantic.BaseModel):
     metadata: Optional[Dict[str, str]] = None
     events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
 
-class CrawlScrapeOptions(pydantic.BaseModel):
+class CommonOptions(pydantic.BaseModel):
     """Parameters for scraping operations."""
     formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None
     headers: Optional[Dict[str, str]] = None
@@ -178,7 +178,7 @@ class ExtractConfig(pydantic.BaseModel):
     schema: Optional[Any] = None
     systemPrompt: Optional[str] = None
 
-class ScrapeParams(CrawlScrapeOptions):
+class ScrapeParams(CommonOptions):
     """Parameters for scraping operations."""
     extract: Optional[ExtractConfig] = None
     jsonOptions: Optional[ExtractConfig] = None
@@ -219,7 +219,7 @@ class CrawlParams(pydantic.BaseModel):
     allowBackwardLinks: Optional[bool] = None
     allowExternalLinks: Optional[bool] = None
     ignoreSitemap: Optional[bool] = None
-    scrapeOptions: Optional[CrawlScrapeOptions] = None
+    scrapeOptions: Optional[CommonOptions] = None
     webhook: Optional[Union[str, WebhookConfig]] = None
     deduplicateSimilarURLs: Optional[bool] = None
     ignoreQueryParameters: Optional[bool] = None
@@ -273,7 +273,7 @@ class ExtractParams(pydantic.BaseModel):
     includeSubdomains: Optional[bool] = None
     origin: Optional[str] = None
     showSources: Optional[bool] = None
-    scrapeOptions: Optional[CrawlScrapeOptions] = None
+    scrapeOptions: Optional[CommonOptions] = None
 
 class ExtractResponse(pydantic.BaseModel, Generic[T]):
     """Response from extract operations."""
@@ -293,7 +293,7 @@ class SearchParams(pydantic.BaseModel):
     location: Optional[str] = None
     origin: Optional[str] = "api"
     timeout: Optional[int] = 60000
-    scrapeOptions: Optional[CrawlScrapeOptions] = None
+    scrapeOptions: Optional[CommonOptions] = None
 
 class SearchResponse(pydantic.BaseModel):
     """Response from search operations."""
@@ -430,7 +430,21 @@ class FirecrawlApp:
     def scrape_url(
             self,
             url: str,
-            params: Optional[ScrapeParams] = None) -> ScrapeResponse[Any]:
+            formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+            include_tags: Optional[List[str]] = None,
+            exclude_tags: Optional[List[str]] = None,
+            only_main_content: Optional[bool] = None,
+            wait_for: Optional[int] = None,
+            timeout: Optional[int] = None,
+            location: Optional[LocationConfig] = None,
+            mobile: Optional[bool] = None,
+            skip_tls_verification: Optional[bool] = None,
+            remove_base64_images: Optional[bool] = None,
+            block_ads: Optional[bool] = None,
+            proxy: Optional[Literal["basic", "stealth"]] = None,
+            extract: Optional[ExtractConfig] = None,
+            json_options: Optional[ExtractConfig] = None,
+            actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
         """
         Scrape and extract content from a URL.
 
@@ -464,66 +478,69 @@ class FirecrawlApp:
         Raises:
           Exception: If scraping fails
         """
-
         headers = self._prepare_headers()
 
-        # Prepare the base scrape parameters with the URL
-        scrape_params = {'url': url}
+        # Build scrape parameters
+        scrape_params = {
+            'url': url,
+            'origin': f"python-sdk@{version}"
+        }
 
-        # If there are additional params, process them
-        if params:
-            # Handle extract (for v1)
-            extract = params.get('extract', {})
-            if extract:
-                if 'schema' in extract and hasattr(extract['schema'], 'schema'):
-                    extract['schema'] = extract['schema'].schema()
-                scrape_params['extract'] = extract
+        # Add optional parameters if provided
+        if formats:
+            scrape_params['formats'] = formats
+        if include_tags:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for:
+            scrape_params['waitFor'] = wait_for
+        if timeout:
+            scrape_params['timeout'] = timeout
+        if location:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy:
+            scrape_params['proxy'] = proxy
+        if extract:
+            if hasattr(extract.schema, 'schema'):
+                extract.schema = extract.schema.schema()
+            scrape_params['extract'] = extract.dict(exclude_none=True)
+        if json_options:
+            if hasattr(json_options.schema, 'schema'):
+                json_options.schema = json_options.schema.schema()
+            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if actions:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
 
-            # Include any other params directly at the top level of scrape_params
-            for key, value in params.items():
-                if key not in ['extract']:
-                    scrape_params[key] = value
-
-            json = params.get("jsonOptions", {})
-            if json:
-                if 'schema' in json and hasattr(json['schema'], 'schema'):
-                    json['schema'] = json['schema'].schema()
-                scrape_params['jsonOptions'] = json
-
-            change_tracking = params.get("changeTrackingOptions", {})
-            if change_tracking:
-                scrape_params['changeTrackingOptions'] = change_tracking
-
-            # Include any other params directly at the top level of scrape_params
-            for key, value in params.items():
-                if key not in ['jsonOptions', 'changeTrackingOptions', 'agent']:
-                    scrape_params[key] = value
-                    
-            agent = params.get('agent')
-            if agent:
-                scrape_params['agent'] = agent
-
-            scrape_params['origin'] = f"python-sdk@{version}"
-
-        endpoint = f'/v1/scrape'
-        # Make the POST request with the prepared headers and JSON data
+        # Make request
         response = requests.post(
-            f'{self.api_url}{endpoint}',
+            f'{self.api_url}/v1/scrape',
             headers=headers,
             json=scrape_params,
-            timeout=(scrape_params["timeout"] + 5000 if "timeout" in scrape_params else None),
+            timeout=(timeout + 5000 if timeout else None)
         )
+
         if response.status_code == 200:
             try:
-                response = response.json()
-            except:
-                raise Exception(f'Failed to parse Firecrawl response as JSON.')
-            if response['success'] and 'data' in response:
-                return response['data']
-            elif "error" in response:
-                raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
-            else:
-                raise Exception(f'Failed to scrape URL. Error: {response}')
+                response_json = response.json()
+                if response_json.get('success') and 'data' in response_json:
+                    return ScrapeResponse(**response_json['data'])
+                elif "error" in response_json:
+                    raise Exception(f'Failed to scrape URL. Error: {response_json["error"]}')
+                else:
+                    raise Exception(f'Failed to scrape URL. Error: {response_json}')
+            except ValueError:
+                raise Exception('Failed to parse Firecrawl response as JSON.')
         else:
             self._handle_error(response, 'scrape URL')
 
@@ -1690,7 +1707,7 @@ class FirecrawlApp:
                                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
                             data.extend(status_data.get('data', []))
                         status_data['data'] = data
-                        return status_data
+                        return CrawlStatusResponse(**status_data)
                     else:
                         raise Exception('Crawl job completed but no data was returned')
                 elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting', 'scraping']:

From 85247991bcc1703ac8690951710c9a0470b90a50 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 18 Apr 2025 00:00:18 -0700
Subject: [PATCH 12/26] generic

---
 apps/python-sdk/firecrawl/firecrawl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index e7cd45e2..015629c6 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -411,12 +411,12 @@ class ExtractParams(pydantic.BaseModel):
     show_sources: Optional[bool] = False
     agent: Optional[Dict[str, Any]] = None
 
-class ExtractResponse(pydantic.BaseModel):
+class ExtractResponse(pydantic.BaseModel, Generic[T]):
     """
     Response from the extract operation.
     """
     success: bool
-    data: Optional[Any] = None
+    data: Optional[T] = None
     error: Optional[str] = None
 
 class FirecrawlApp:

From 8c5509cbb4c2e2be6538868c1377451fc631da00 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 00:26:00 -0700
Subject: [PATCH 13/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 107 ++++++++++++++++---------
 1 file changed, 71 insertions(+), 36 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index b6f77661..d56f951c 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -558,61 +558,96 @@ class FirecrawlApp:
     def search(
             self,
             query: str,
-            params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
+            limit: Optional[int] = None,
+            tbs: Optional[str] = None,
+            filter: Optional[str] = None,
+            lang: Optional[str] = None,
+            country: Optional[str] = None,
+            location: Optional[str] = None,
+            timeout: Optional[int] = None,
+            scrape_options: Optional[CommonOptions] = None,
+            params: Optional[Union[Dict[str, Any], SearchParams]] = None,
+            **kwargs) -> SearchResponse:
         """
         Search for content using Firecrawl.
 
         Args:
-          query (str): Search query string
-
-          params (Optional[Union[Dict[str, Any], SearchParams]]): See SearchParams model:
-
-            Search Options:
-            * limit - Max results (default: 5)
-            * tbs - Time filter (e.g. "qdr:d")
-            * filter - Custom result filter
-                
-            Localization:
-            * lang - Language code (default: "en")
-            * country - Country code (default: "us")
-            * location - Geo-targeting
-            
-            Request Options:
-            * timeout - Request timeout (ms)
-            * scrapeOptions - Result scraping config, check ScrapeParams model for more details
+            query (str): Search query string
+            limit (Optional[int]): Max results (default: 5)
+            tbs (Optional[str]): Time filter (e.g. "qdr:d")
+            filter (Optional[str]): Custom result filter
+            lang (Optional[str]): Language code (default: "en")
+            country (Optional[str]): Country code (default: "us") 
+            location (Optional[str]): Geo-targeting
+            timeout (Optional[int]): Request timeout in milliseconds
+            scrape_options (Optional[CommonOptions]): Result scraping configuration
+            params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
+            **kwargs: Additional keyword arguments for future compatibility
 
         Returns:
-          SearchResponse
-
+            SearchResponse: Response containing:
+                * success (bool): Whether request succeeded
+                * data (List[FirecrawlDocument]): Search results
+                * warning (Optional[str]): Warning message if any
+                * error (Optional[str]): Error message if any
 
         Raises:
-          Exception: If search fails
+            Exception: If search fails or response cannot be parsed
         """
-        if params is None:
-            params = {}
+        # Build search parameters
+        search_params = {}
+        if params:
+            if isinstance(params, dict):
+                search_params.update(params)
+            else:
+                search_params.update(params.dict(exclude_none=True))
 
-        if isinstance(params, dict):
-            search_params = SearchParams(query=query, **params)
-        else:
-            search_params = params
-            search_params.query = query
+        # Add individual parameters
+        if limit is not None:
+            search_params['limit'] = limit
+        if tbs is not None:
+            search_params['tbs'] = tbs
+        if filter is not None:
+            search_params['filter'] = filter
+        if lang is not None:
+            search_params['lang'] = lang
+        if country is not None:
+            search_params['country'] = country
+        if location is not None:
+            search_params['location'] = location
+        if timeout is not None:
+            search_params['timeout'] = timeout
+        if scrape_options is not None:
+            search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
+        
+        # Add any additional kwargs
+        search_params.update(kwargs)
 
-        params_dict = search_params.dict(exclude_none=True)
+        # Create final params object
+        final_params = SearchParams(query=query, **search_params)
+        params_dict = final_params.dict(exclude_none=True)
         params_dict['origin'] = f"python-sdk@{version}"
 
+        # Make request
         response = requests.post(
             f"{self.api_url}/v1/search",
             headers={"Authorization": f"Bearer {self.api_key}"},
             json=params_dict
         )
 
-        if response.status_code != 200:
-            raise Exception(f"Request failed with status code {response.status_code}")
-
-        try:
-            return response.json()
-        except:
-            raise Exception(f'Failed to parse Firecrawl response as JSON.')
+        if response.status_code == 200:
+            try:
+                response_json = response.json()
+                if response_json.get('success') and 'data' in response_json:
+                    return SearchResponse(**response_json)
+                elif "error" in response_json:
+                    raise Exception(f'Search failed. Error: {response_json["error"]}')
+                else:
+                    raise Exception(f'Search failed. Error: {response_json}')
+            except ValueError:
+                raise Exception('Failed to parse Firecrawl response as JSON.')
+        else:
+            self._handle_error(response, 'search')
 
     def crawl_url(self, url: str,
                   params: Optional[CrawlParams] = None,

From a655d24e7cdcc64e11578f3afa78c4732331b5fe Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 18 Apr 2025 00:29:20 -0700
Subject: [PATCH 14/26] scrape params commentary

---
 apps/python-sdk/firecrawl/firecrawl.py | 32 ++++++++++++--------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index d56f951c..ea99aaa7 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -461,23 +461,21 @@ class FirecrawlApp:
 
         Args:
           url (str): Target URL to scrape
-          params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
-            Content Options:
-            * formats - Content types to retrieve (markdown/html/etc)
-            * includeTags - HTML tags to include
-            * excludeTags - HTML tags to exclude
-            * onlyMainContent - Extract main content only
-                
-            Request Options:
-            * headers - Custom HTTP headers
-            * timeout - Request timeout (ms)
-            * mobile - Use mobile user agent
-            * proxy - Proxy type (basic/stealth)
-                
-            Extraction Options:
-            * extract - Content extraction settings
-            * jsonOptions - JSON extraction settings
-            * actions - Actions to perform
+          formats (Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]]): Content types to retrieve (markdown/html/etc)
+          include_tags (Optional[List[str]]): HTML tags to include
+          exclude_tags (Optional[List[str]]): HTML tags to exclude
+          only_main_content (Optional[bool]): Extract main content only
+          wait_for (Optional[int]): Wait for a specific element to appear
+          timeout (Optional[int]): Request timeout (ms)
+          location (Optional[LocationConfig]): Location configuration
+          mobile (Optional[bool]): Use mobile user agent
+          skip_tls_verification (Optional[bool]): Skip TLS verification
+          remove_base64_images (Optional[bool]): Remove base64 images
+          block_ads (Optional[bool]): Block ads
+          proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
+          extract (Optional[ExtractConfig]): Content extraction settings
+          json_options (Optional[ExtractConfig]): JSON extraction settings
+          actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
 
         Returns:
           ScrapeResponse with:

From 5e6e41ab175207fd2fd0a24b6f8c5406b4826fb5 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 00:37:34 -0700
Subject: [PATCH 15/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 868 +++++++++++++++++--------
 1 file changed, 611 insertions(+), 257 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index d56f951c..1eb5f8e7 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -441,6 +441,7 @@ class FirecrawlApp:
     def scrape_url(
             self,
             url: str,
+            *,
             formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
             include_tags: Optional[List[str]] = None,
             exclude_tags: Optional[List[str]] = None,
@@ -455,7 +456,8 @@ class FirecrawlApp:
             proxy: Optional[Literal["basic", "stealth"]] = None,
             extract: Optional[ExtractConfig] = None,
             json_options: Optional[ExtractConfig] = None,
-            actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
+            actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
+            **kwargs) -> ScrapeResponse[Any]:
         """
         Scrape and extract content from a URL.
 
@@ -479,6 +481,7 @@ class FirecrawlApp:
             * jsonOptions - JSON extraction settings
             * actions - Actions to perform
 
+
         Returns:
           ScrapeResponse with:
           * Requested content formats
@@ -532,6 +535,7 @@ class FirecrawlApp:
             scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
         if actions:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        scrape_params.update(kwargs)
 
         # Make request
         response = requests.post(
@@ -558,6 +562,7 @@ class FirecrawlApp:
     def search(
             self,
             query: str,
+            *,
             limit: Optional[int] = None,
             tbs: Optional[str] = None,
             filter: Optional[str] = None,
@@ -649,97 +654,150 @@ class FirecrawlApp:
         else:
             self._handle_error(response, 'search')
 
-    def crawl_url(self, url: str,
-                  params: Optional[CrawlParams] = None,
-                  poll_interval: Optional[int] = 2,
-                  idempotency_key: Optional[str] = None) -> CrawlStatusResponse:
+    def crawl_url(
+        self,
+        url: str,
+        *,
+        include_paths: Optional[List[str]] = None,
+        exclude_paths: Optional[List[str]] = None,
+        max_depth: Optional[int] = None,
+        max_discovery_depth: Optional[int] = None,
+        limit: Optional[int] = None,
+        allow_backward_links: Optional[bool] = None,
+        allow_external_links: Optional[bool] = None,
+        ignore_sitemap: Optional[bool] = None,
+        scrape_options: Optional[CommonOptions] = None,
+        webhook: Optional[Union[str, WebhookConfig]] = None,
+        deduplicate_similar_urls: Optional[bool] = None,
+        ignore_query_parameters: Optional[bool] = None,
+        regex_on_full_url: Optional[bool] = None,
+        poll_interval: Optional[int] = 2,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> CrawlStatusResponse:
         """
         Crawl a website starting from a URL.
 
         Args:
-          url (str): Target URL to start crawling from
-          params (Optional[CrawlParams]): See CrawlParams model:
-            URL Discovery:
-            * includePaths - Patterns of URLs to include
-            * excludePaths - Patterns of URLs to exclude
-            * maxDepth - Maximum crawl depth
-            * maxDiscoveryDepth - Maximum depth for finding new URLs
-            * limit - Maximum pages to crawl
-
-            Link Following:
-            * allowBackwardLinks - Follow parent directory links
-            * allowExternalLinks - Follow external domain links  
-            * ignoreSitemap - Skip sitemap.xml processing
-
-            Advanced:
-            * scrapeOptions - Page scraping configuration
-            * webhook - Notification webhook settings
-            * deduplicateSimilarURLs - Remove similar URLs
-            * ignoreQueryParameters - Ignore URL parameters
-            * regexOnFullURL - Apply regex to full URLs
-          poll_interval (int): Seconds between status checks (default: 2)
-          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            url (str): Target URL to start crawling from
+            include_paths (Optional[List[str]]): Patterns of URLs to include
+            exclude_paths (Optional[List[str]]): Patterns of URLs to exclude
+            max_depth (Optional[int]): Maximum crawl depth
+            max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
+            limit (Optional[int]): Maximum pages to crawl
+            allow_backward_links (Optional[bool]): Follow parent directory links
+            allow_external_links (Optional[bool]): Follow external domain links
+            ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
+            scrape_options (Optional[CommonOptions]): Page scraping configuration
+            webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
+            deduplicate_similar_urls (Optional[bool]): Remove similar URLs
+            ignore_query_parameters (Optional[bool]): Ignore URL parameters
+            regex_on_full_url (Optional[bool]): Apply regex to full URLs
+            poll_interval (Optional[int]): Seconds between status checks (default: 2)
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          CrawlStatusResponse with:
-          * Crawling status and progress
-          * Crawled page contents
-          * Success/error information
+            CrawlStatusResponse with:
+            * Crawling status and progress
+            * Crawled page contents
+            * Success/error information
 
         Raises:
-          Exception: If crawl fails
+            Exception: If crawl fails
         """
-        endpoint = f'/v1/crawl'
+        crawl_params = {}
+
+        # Add individual parameters
+        if include_paths is not None:
+            crawl_params['includePaths'] = include_paths
+        if exclude_paths is not None:
+            crawl_params['excludePaths'] = exclude_paths
+        if max_depth is not None:
+            crawl_params['maxDepth'] = max_depth
+        if max_discovery_depth is not None:
+            crawl_params['maxDiscoveryDepth'] = max_discovery_depth
+        if limit is not None:
+            crawl_params['limit'] = limit
+        if allow_backward_links is not None:
+            crawl_params['allowBackwardLinks'] = allow_backward_links
+        if allow_external_links is not None:
+            crawl_params['allowExternalLinks'] = allow_external_links
+        if ignore_sitemap is not None:
+            crawl_params['ignoreSitemap'] = ignore_sitemap
+        if scrape_options is not None:
+            crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
+        if webhook is not None:
+            crawl_params['webhook'] = webhook
+        if deduplicate_similar_urls is not None:
+            crawl_params['deduplicateSimilarURLs'] = deduplicate_similar_urls
+        if ignore_query_parameters is not None:
+            crawl_params['ignoreQueryParameters'] = ignore_query_parameters
+        if regex_on_full_url is not None:
+            crawl_params['regexOnFullURL'] = regex_on_full_url
+
+        # Add any additional kwargs
+        crawl_params.update(kwargs)
+
+        # Create final params object
+        final_params = CrawlParams(**crawl_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['url'] = url
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
         headers = self._prepare_headers(idempotency_key)
-        json_data = {'url': url}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
-        response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
+        response = self._post_request(f'{self.api_url}/v1/crawl', params_dict, headers)
+
         if response.status_code == 200:
             try:
                 id = response.json().get('id')
             except:
                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
             return self._monitor_job_status(id, headers, poll_interval)
-
         else:
             self._handle_error(response, 'start crawl job')
 
-
     def async_crawl_url(
-            self,
-            url: str,
-            params: Optional[CrawlParams] = None,
-            idempotency_key: Optional[str] = None) -> CrawlResponse:
+        self,
+        url: str,
+        *,
+        include_paths: Optional[List[str]] = None,
+        exclude_paths: Optional[List[str]] = None,
+        max_depth: Optional[int] = None,
+        max_discovery_depth: Optional[int] = None,
+        limit: Optional[int] = None,
+        allow_backward_links: Optional[bool] = None,
+        allow_external_links: Optional[bool] = None,
+        ignore_sitemap: Optional[bool] = None,
+        scrape_options: Optional[CommonOptions] = None,
+        webhook: Optional[Union[str, WebhookConfig]] = None,
+        deduplicate_similar_urls: Optional[bool] = None,
+        ignore_query_parameters: Optional[bool] = None,
+        regex_on_full_url: Optional[bool] = None,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> CrawlResponse:
         """
         Start an asynchronous crawl job.
 
         Args:
             url (str): Target URL to start crawling from
-
-            params (Optional[CrawlParams]): See CrawlParams model:
-
-              URL Discovery:
-              * includePaths - Patterns of URLs to include
-              * excludePaths - Patterns of URLs to exclude
-              * maxDepth - Maximum crawl depth
-              * maxDiscoveryDepth - Maximum depth for finding new URLs
-              * limit - Maximum pages to crawl
-
-              Link Following:
-              * allowBackwardLinks - Follow parent directory links
-              * allowExternalLinks - Follow external domain links  
-              * ignoreSitemap - Skip sitemap.xml processing
-
-              Advanced:
-              * scrapeOptions - Page scraping configuration
-              * webhook - Notification webhook settings
-              * deduplicateSimilarURLs - Remove similar URLs
-              * ignoreQueryParameters - Ignore URL parameters
-              * regexOnFullURL - Apply regex to full URLs
-
-            idempotency_key: Unique key to prevent duplicate requests
+            include_paths (Optional[List[str]]): Patterns of URLs to include
+            exclude_paths (Optional[List[str]]): Patterns of URLs to exclude
+            max_depth (Optional[int]): Maximum crawl depth
+            max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
+            limit (Optional[int]): Maximum pages to crawl
+            allow_backward_links (Optional[bool]): Follow parent directory links
+            allow_external_links (Optional[bool]): Follow external domain links
+            ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
+            scrape_options (Optional[CommonOptions]): Page scraping configuration
+            webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
+            deduplicate_similar_urls (Optional[bool]): Remove similar URLs
+            ignore_query_parameters (Optional[bool]): Ignore URL parameters
+            regex_on_full_url (Optional[bool]): Apply regex to full URLs
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
             CrawlResponse with:
@@ -751,16 +809,52 @@ class FirecrawlApp:
         Raises:
             Exception: If crawl initiation fails
         """
-        endpoint = f'/v1/crawl'
+        crawl_params = {}
+
+        # Add individual parameters
+        if include_paths is not None:
+            crawl_params['includePaths'] = include_paths
+        if exclude_paths is not None:
+            crawl_params['excludePaths'] = exclude_paths
+        if max_depth is not None:
+            crawl_params['maxDepth'] = max_depth
+        if max_discovery_depth is not None:
+            crawl_params['maxDiscoveryDepth'] = max_discovery_depth
+        if limit is not None:
+            crawl_params['limit'] = limit
+        if allow_backward_links is not None:
+            crawl_params['allowBackwardLinks'] = allow_backward_links
+        if allow_external_links is not None:
+            crawl_params['allowExternalLinks'] = allow_external_links
+        if ignore_sitemap is not None:
+            crawl_params['ignoreSitemap'] = ignore_sitemap
+        if scrape_options is not None:
+            crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
+        if webhook is not None:
+            crawl_params['webhook'] = webhook
+        if deduplicate_similar_urls is not None:
+            crawl_params['deduplicateSimilarURLs'] = deduplicate_similar_urls
+        if ignore_query_parameters is not None:
+            crawl_params['ignoreQueryParameters'] = ignore_query_parameters
+        if regex_on_full_url is not None:
+            crawl_params['regexOnFullURL'] = regex_on_full_url
+
+        # Add any additional kwargs
+        crawl_params.update(kwargs)
+
+        # Create final params object
+        final_params = CrawlParams(**crawl_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['url'] = url
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
         headers = self._prepare_headers(idempotency_key)
-        json_data = {'url': url}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
-        response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
+        response = self._post_request(f'{self.api_url}/v1/crawl', params_dict, headers)
+
         if response.status_code == 200:
             try:
-                return response.json()
+                return CrawlResponse(**response.json())
             except:
                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
         else:
@@ -842,10 +936,10 @@ class FirecrawlApp:
             if 'next' in status_data:
                 response['next'] = status_data['next']
 
-            return {
-                'success': False if 'error' in status_data else True,
+            return CrawlStatusResponse(
+                success=False if 'error' in status_data else True,
                 **response
-            }
+            )
         else:
             self._handle_error(response, 'check crawl status')
     
@@ -872,7 +966,7 @@ class FirecrawlApp:
         response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
         if response.status_code == 200:
             try:
-                return response.json()
+                return CrawlErrorsResponse(**response.json())
             except:
                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
         else:
@@ -906,254 +1000,519 @@ class FirecrawlApp:
     def crawl_url_and_watch(
             self,
             url: str,
-            params: Optional[CrawlParams] = None,
-            idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
+            *,
+            include_paths: Optional[List[str]] = None,
+            exclude_paths: Optional[List[str]] = None,
+            max_depth: Optional[int] = None,
+            max_discovery_depth: Optional[int] = None,
+            limit: Optional[int] = None,
+            allow_backward_links: Optional[bool] = None,
+            allow_external_links: Optional[bool] = None,
+            ignore_sitemap: Optional[bool] = None,
+            scrape_options: Optional[CommonOptions] = None,
+            webhook: Optional[Union[str, WebhookConfig]] = None,
+            deduplicate_similar_urls: Optional[bool] = None,
+            ignore_query_parameters: Optional[bool] = None,
+            regex_on_full_url: Optional[bool] = None,
+            idempotency_key: Optional[str] = None,
+            **kwargs
+    ) -> 'CrawlWatcher':
         """
         Initiate a crawl job and return a CrawlWatcher to monitor the job via WebSocket.
 
         Args:
-          url (str): Target URL to start crawling from
-          params (Optional[CrawlParams]): See CrawlParams model for configuration:
-            URL Discovery:
-            * includePaths - Patterns of URLs to include
-            * excludePaths - Patterns of URLs to exclude
-            * maxDepth - Maximum crawl depth
-            * maxDiscoveryDepth - Maximum depth for finding new URLs
-            * limit - Maximum pages to crawl
-
-            Link Following:
-            * allowBackwardLinks - Follow parent directory links
-            * allowExternalLinks - Follow external domain links  
-            * ignoreSitemap - Skip sitemap.xml processing
-
-            Advanced:
-            * scrapeOptions - Page scraping configuration
-            * webhook - Notification webhook settings
-            * deduplicateSimilarURLs - Remove similar URLs
-            * ignoreQueryParameters - Ignore URL parameters
-            * regexOnFullURL - Apply regex to full URLs
-          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            url (str): Target URL to start crawling from
+            include_paths (Optional[List[str]]): Patterns of URLs to include
+            exclude_paths (Optional[List[str]]): Patterns of URLs to exclude
+            max_depth (Optional[int]): Maximum crawl depth
+            max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
+            limit (Optional[int]): Maximum pages to crawl
+            allow_backward_links (Optional[bool]): Follow parent directory links
+            allow_external_links (Optional[bool]): Follow external domain links
+            ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
+            scrape_options (Optional[CommonOptions]): Page scraping configuration
+            webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
+            deduplicate_similar_urls (Optional[bool]): Remove similar URLs
+            ignore_query_parameters (Optional[bool]): Ignore URL parameters
+            regex_on_full_url (Optional[bool]): Apply regex to full URLs
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          AsyncCrawlWatcher: An instance to monitor the crawl job via WebSocket
+            CrawlWatcher: An instance to monitor the crawl job via WebSocket
 
         Raises:
-          Exception: If crawl job fails to start
+            Exception: If crawl job fails to start
         """
-        crawl_response = self.async_crawl_url(url, params, idempotency_key)
-        if crawl_response['success'] and 'id' in crawl_response:
-            return CrawlWatcher(crawl_response['id'], self)
+        crawl_response = self.async_crawl_url(
+            url,
+            include_paths=include_paths,
+            exclude_paths=exclude_paths,
+            max_depth=max_depth,
+            max_discovery_depth=max_discovery_depth,
+            limit=limit,
+            allow_backward_links=allow_backward_links,
+            allow_external_links=allow_external_links,
+            ignore_sitemap=ignore_sitemap,
+            scrape_options=scrape_options,
+            webhook=webhook,
+            deduplicate_similar_urls=deduplicate_similar_urls,
+            ignore_query_parameters=ignore_query_parameters,
+            regex_on_full_url=regex_on_full_url,
+            idempotency_key=idempotency_key,
+            **kwargs
+        )
+        if crawl_response.success and crawl_response.id:
+            return CrawlWatcher(crawl_response.id, self)
         else:
             raise Exception("Crawl job failed to start")
 
     def map_url(
             self,
             url: str,
+            *,
+            search: Optional[str] = None,
+            ignore_sitemap: Optional[bool] = None,
+            include_subdomains: Optional[bool] = None,
+            sitemap_only: Optional[bool] = None,
+            limit: Optional[int] = None,
+            timeout: Optional[int] = None,
             params: Optional[MapParams] = None) -> MapResponse:
         """
         Map and discover links from a URL.
 
         Args:
-          url: Target URL to map
-
-          params: See MapParams model:
-
-            Discovery Options:
-            * search - Filter pattern for URLs
-            * ignoreSitemap - Skip sitemap.xml
-            * includeSubdomains - Include subdomain links
-            * sitemapOnly - Only use sitemap.xml
-            
-            Limits:
-            * limit - Max URLs to return
-            * timeout - Request timeout (ms)
+            url (str): Target URL to map
+            search (Optional[str]): Filter pattern for URLs
+            ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
+            include_subdomains (Optional[bool]): Include subdomain links
+            sitemap_only (Optional[bool]): Only use sitemap.xml
+            limit (Optional[int]): Maximum URLs to return
+            timeout (Optional[int]): Request timeout in milliseconds
+            params (Optional[MapParams]): Additional mapping parameters
 
         Returns:
-          MapResponse with:
-          * Discovered URLs
-          * Success/error status
+            MapResponse: Response containing:
+                * success (bool): Whether request succeeded
+                * links (List[str]): Discovered URLs
+                * error (Optional[str]): Error message if any
 
         Raises:
-          Exception: If mapping fails
+            Exception: If mapping fails or response cannot be parsed
         """
-        endpoint = f'/v1/map'
-        headers = self._prepare_headers()
-
-        # Prepare the base scrape parameters with the URL
-        json_data = {'url': url}
+        # Build map parameters
+        map_params = {}
         if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
+            map_params.update(params.dict(exclude_none=True))
 
-        # Make the POST request with the prepared headers and JSON data
+        # Add individual parameters
+        if search is not None:
+            map_params['search'] = search
+        if ignore_sitemap is not None:
+            map_params['ignoreSitemap'] = ignore_sitemap
+        if include_subdomains is not None:
+            map_params['includeSubdomains'] = include_subdomains
+        if sitemap_only is not None:
+            map_params['sitemapOnly'] = sitemap_only
+        if limit is not None:
+            map_params['limit'] = limit
+        if timeout is not None:
+            map_params['timeout'] = timeout
+
+        # Create final params object
+        final_params = MapParams(**map_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['url'] = url
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
         response = requests.post(
-            f'{self.api_url}{endpoint}',
-            headers=headers,
-            json=json_data,
+            f"{self.api_url}/v1/map",
+            headers={"Authorization": f"Bearer {self.api_key}"},
+            json=params_dict
         )
+
         if response.status_code == 200:
             try:
-                response = response.json()
-            except:
-                raise Exception(f'Failed to parse Firecrawl response as JSON.')
-            if response['success'] and 'links' in response:
-                return response
-            elif 'error' in response:
-                raise Exception(f'Failed to map URL. Error: {response["error"]}')
-            else:
-                raise Exception(f'Failed to map URL. Error: {response}')
+                response_json = response.json()
+                if response_json.get('success') and 'links' in response_json:
+                    return MapResponse(**response_json)
+                elif "error" in response_json:
+                    raise Exception(f'Map failed. Error: {response_json["error"]}')
+                else:
+                    raise Exception(f'Map failed. Error: {response_json}')
+            except ValueError:
+                raise Exception('Failed to parse Firecrawl response as JSON.')
         else:
             self._handle_error(response, 'map')
 
-    def batch_scrape_urls(self, urls: List[str],
-                  params: Optional[ScrapeParams] = None,
-                  poll_interval: Optional[int] = 2,
-                  idempotency_key: Optional[str] = None) -> BatchScrapeStatusResponse:
+    def batch_scrape_urls(
+        self,
+        urls: List[str],
+        *,
+        formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        include_tags: Optional[List[str]] = None,
+        exclude_tags: Optional[List[str]] = None,
+        only_main_content: Optional[bool] = None,
+        wait_for: Optional[int] = None,
+        timeout: Optional[int] = None,
+        location: Optional[LocationConfig] = None,
+        mobile: Optional[bool] = None,
+        skip_tls_verification: Optional[bool] = None,
+        remove_base64_images: Optional[bool] = None,
+        block_ads: Optional[bool] = None,
+        proxy: Optional[Literal["basic", "stealth"]] = None,
+        extract: Optional[ExtractConfig] = None,
+        json_options: Optional[ExtractConfig] = None,
+        actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
+        agent: Optional[AgentOptions] = None,
+        poll_interval: Optional[int] = 2,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> BatchScrapeStatusResponse:
         """
         Batch scrape multiple URLs and monitor until completion.
 
         Args:
             urls (List[str]): URLs to scrape
-            params (Optional[ScrapeParams]): See ScrapeParams model:
-              Content Options:
-              * formats - Content formats to retrieve
-              * includeTags - HTML tags to include
-              * excludeTags - HTML tags to exclude
-              * onlyMainContent - Extract main content only
-                
-              Request Options:
-              * headers - Custom HTTP headers
-              * timeout - Request timeout (ms)
-              * mobile - Use mobile user agent
-              * proxy - Proxy type
-              
-              Extraction Options:
-              * extract - Content extraction config
-              * jsonOptions - JSON extraction config
-              * actions - Actions to perform
+            formats (Optional[List[Literal]]): Content formats to retrieve
+            headers (Optional[Dict[str, str]]): Custom HTTP headers
+            include_tags (Optional[List[str]]): HTML tags to include
+            exclude_tags (Optional[List[str]]): HTML tags to exclude
+            only_main_content (Optional[bool]): Extract main content only
+            wait_for (Optional[int]): Wait time in milliseconds
+            timeout (Optional[int]): Request timeout in milliseconds
+            location (Optional[LocationConfig]): Location configuration
+            mobile (Optional[bool]): Use mobile user agent
+            skip_tls_verification (Optional[bool]): Skip TLS verification
+            remove_base64_images (Optional[bool]): Remove base64 encoded images
+            block_ads (Optional[bool]): Block advertisements
+            proxy (Optional[Literal]): Proxy type to use
+            extract (Optional[ExtractConfig]): Content extraction config
+            json_options (Optional[ExtractConfig]): JSON extraction config
+            actions (Optional[List[Union]]): Actions to perform
+            agent (Optional[AgentOptions]): Agent configuration
+            poll_interval (Optional[int]): Seconds between status checks (default: 2)
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          BatchScrapeStatusResponse with:
-          * Scraping status and progress
-          * Scraped content for each URL
-          * Success/error information
+            BatchScrapeStatusResponse with:
+            * Scraping status and progress
+            * Scraped content for each URL
+            * Success/error information
 
         Raises:
-          Exception: If batch scrape fails
+            Exception: If batch scrape fails
         """
-        endpoint = f'/v1/batch/scrape'
+        scrape_params = {}
+
+        # Add individual parameters
+        if formats is not None:
+            scrape_params['formats'] = formats
+        if headers is not None:
+            scrape_params['headers'] = headers
+        if include_tags is not None:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags is not None:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for is not None:
+            scrape_params['waitFor'] = wait_for
+        if timeout is not None:
+            scrape_params['timeout'] = timeout
+        if location is not None:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy is not None:
+            scrape_params['proxy'] = proxy
+        if extract is not None:
+            if hasattr(extract.schema, 'schema'):
+                extract.schema = extract.schema.schema()
+            scrape_params['extract'] = extract.dict(exclude_none=True)
+        if json_options is not None:
+            if hasattr(json_options.schema, 'schema'):
+                json_options.schema = json_options.schema.schema()
+            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if actions is not None:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        if agent is not None:
+            scrape_params['agent'] = agent.dict(exclude_none=True)
+
+        # Add any additional kwargs
+        scrape_params.update(kwargs)
+
+        # Create final params object
+        final_params = ScrapeParams(**scrape_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['urls'] = urls
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
         headers = self._prepare_headers(idempotency_key)
-        json_data = {'urls': urls}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
-        response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
+        response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
+
         if response.status_code == 200:
             try:
                 id = response.json().get('id')
             except:
                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
             return self._monitor_job_status(id, headers, poll_interval)
-
         else:
             self._handle_error(response, 'start batch scrape job')
 
-
     def async_batch_scrape_urls(
-            self,
-            urls: List[str],
-            params: Optional[ScrapeParams] = None,
-            idempotency_key: Optional[str] = None) -> BatchScrapeResponse:
+        self,
+        urls: List[str],
+        *,
+        formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        include_tags: Optional[List[str]] = None,
+        exclude_tags: Optional[List[str]] = None,
+        only_main_content: Optional[bool] = None,
+        wait_for: Optional[int] = None,
+        timeout: Optional[int] = None,
+        location: Optional[LocationConfig] = None,
+        mobile: Optional[bool] = None,
+        skip_tls_verification: Optional[bool] = None,
+        remove_base64_images: Optional[bool] = None,
+        block_ads: Optional[bool] = None,
+        proxy: Optional[Literal["basic", "stealth"]] = None,
+        extract: Optional[ExtractConfig] = None,
+        json_options: Optional[ExtractConfig] = None,
+        actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
+        agent: Optional[AgentOptions] = None,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> BatchScrapeResponse:
         """
         Initiate a batch scrape job asynchronously.
 
         Args:
-          urls (List[str]): List of URLs to scrape
-          params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
-            Content Options:
-            * formats - Content formats to retrieve
-            * includeTags - HTML tags to include
-            * excludeTags - HTML tags to exclude
-            * onlyMainContent - Extract main content only
-            
-            Request Options:
-            * headers - Custom HTTP headers
-            * timeout - Request timeout (ms)
-            * mobile - Use mobile user agent
-            * proxy - Proxy type
-            
-            Extraction Options:
-            * extract - Content extraction config
-            * jsonOptions - JSON extraction config
-            * actions - Actions to perform
-          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            urls (List[str]): URLs to scrape
+            formats (Optional[List[Literal]]): Content formats to retrieve
+            headers (Optional[Dict[str, str]]): Custom HTTP headers
+            include_tags (Optional[List[str]]): HTML tags to include
+            exclude_tags (Optional[List[str]]): HTML tags to exclude
+            only_main_content (Optional[bool]): Extract main content only
+            wait_for (Optional[int]): Wait time in milliseconds
+            timeout (Optional[int]): Request timeout in milliseconds
+            location (Optional[LocationConfig]): Location configuration
+            mobile (Optional[bool]): Use mobile user agent
+            skip_tls_verification (Optional[bool]): Skip TLS verification
+            remove_base64_images (Optional[bool]): Remove base64 encoded images
+            block_ads (Optional[bool]): Block advertisements
+            proxy (Optional[Literal]): Proxy type to use
+            extract (Optional[ExtractConfig]): Content extraction config
+            json_options (Optional[ExtractConfig]): JSON extraction config
+            actions (Optional[List[Union]]): Actions to perform
+            agent (Optional[AgentOptions]): Agent configuration
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          BatchScrapeResponse with:
-          * success - Whether job started successfully
-          * id - Unique identifier for the job
-          * url - Status check URL
-          * error - Error message if start failed
+            BatchScrapeResponse with:
+            * success - Whether job started successfully
+            * id - Unique identifier for the job
+            * url - Status check URL
+            * error - Error message if start failed
 
         Raises:
-          Exception: If job initiation fails
+            Exception: If job initiation fails
         """
-        endpoint = f'/v1/batch/scrape'
+        scrape_params = {}
+
+        # Add individual parameters
+        if formats is not None:
+            scrape_params['formats'] = formats
+        if headers is not None:
+            scrape_params['headers'] = headers
+        if include_tags is not None:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags is not None:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for is not None:
+            scrape_params['waitFor'] = wait_for
+        if timeout is not None:
+            scrape_params['timeout'] = timeout
+        if location is not None:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy is not None:
+            scrape_params['proxy'] = proxy
+        if extract is not None:
+            if hasattr(extract.schema, 'schema'):
+                extract.schema = extract.schema.schema()
+            scrape_params['extract'] = extract.dict(exclude_none=True)
+        if json_options is not None:
+            if hasattr(json_options.schema, 'schema'):
+                json_options.schema = json_options.schema.schema()
+            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if actions is not None:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        if agent is not None:
+            scrape_params['agent'] = agent.dict(exclude_none=True)
+
+        # Add any additional kwargs
+        scrape_params.update(kwargs)
+
+        # Create final params object
+        final_params = ScrapeParams(**scrape_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['urls'] = urls
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
         headers = self._prepare_headers(idempotency_key)
-        json_data = {'urls': urls}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
-        response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
+        response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
+
         if response.status_code == 200:
             try:
-                return response.json()
+                return BatchScrapeResponse(**response.json())
             except:
                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
         else:
             self._handle_error(response, 'start batch scrape job')
     
     def batch_scrape_urls_and_watch(
-            self,
-            urls: List[str],
-            params: Optional[ScrapeParams] = None,
-            idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
+        self,
+        urls: List[str],
+        *,
+        formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        include_tags: Optional[List[str]] = None,
+        exclude_tags: Optional[List[str]] = None,
+        only_main_content: Optional[bool] = None,
+        wait_for: Optional[int] = None,
+        timeout: Optional[int] = None,
+        location: Optional[LocationConfig] = None,
+        mobile: Optional[bool] = None,
+        skip_tls_verification: Optional[bool] = None,
+        remove_base64_images: Optional[bool] = None,
+        block_ads: Optional[bool] = None,
+        proxy: Optional[Literal["basic", "stealth"]] = None,
+        extract: Optional[ExtractConfig] = None,
+        json_options: Optional[ExtractConfig] = None,
+        actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
+        agent: Optional[AgentOptions] = None,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> 'CrawlWatcher':
         """
         Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
 
         Args:
-            urls (List[str]): List of URLs to scrape
-            params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
-
-              Content Options:
-              * formats - Content formats to retrieve
-              * includeTags - HTML tags to include
-              * excludeTags - HTML tags to exclude
-              * onlyMainContent - Extract main content only
-              
-              Request Options:
-              * headers - Custom HTTP headers
-              * timeout - Request timeout (ms)
-              * mobile - Use mobile user agent
-              * proxy - Proxy type
-              
-              Extraction Options:
-              * extract - Content extraction config
-              * jsonOptions - JSON extraction config
-              * actions - Actions to perform
+            urls (List[str]): URLs to scrape
+            formats (Optional[List[Literal]]): Content formats to retrieve
+            headers (Optional[Dict[str, str]]): Custom HTTP headers
+            include_tags (Optional[List[str]]): HTML tags to include
+            exclude_tags (Optional[List[str]]): HTML tags to exclude
+            only_main_content (Optional[bool]): Extract main content only
+            wait_for (Optional[int]): Wait time in milliseconds
+            timeout (Optional[int]): Request timeout in milliseconds
+            location (Optional[LocationConfig]): Location configuration
+            mobile (Optional[bool]): Use mobile user agent
+            skip_tls_verification (Optional[bool]): Skip TLS verification
+            remove_base64_images (Optional[bool]): Remove base64 encoded images
+            block_ads (Optional[bool]): Block advertisements
+            proxy (Optional[Literal]): Proxy type to use
+            extract (Optional[ExtractConfig]): Content extraction config
+            json_options (Optional[ExtractConfig]): JSON extraction config
+            actions (Optional[List[Union]]): Actions to perform
+            agent (Optional[AgentOptions]): Agent configuration
             idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-            AsyncCrawlWatcher: An instance to monitor the batch scrape job via WebSocket
+            CrawlWatcher: An instance to monitor the batch scrape job via WebSocket
 
         Raises:
             Exception: If batch scrape job fails to start
         """
-        crawl_response = self.async_batch_scrape_urls(urls, params, idempotency_key)
-        if crawl_response['success'] and 'id' in crawl_response:
-            return CrawlWatcher(crawl_response['id'], self)
+        scrape_params = {}
+
+        # Add individual parameters
+        if formats is not None:
+            scrape_params['formats'] = formats
+        if headers is not None:
+            scrape_params['headers'] = headers
+        if include_tags is not None:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags is not None:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for is not None:
+            scrape_params['waitFor'] = wait_for
+        if timeout is not None:
+            scrape_params['timeout'] = timeout
+        if location is not None:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy is not None:
+            scrape_params['proxy'] = proxy
+        if extract is not None:
+            if hasattr(extract.schema, 'schema'):
+                extract.schema = extract.schema.schema()
+            scrape_params['extract'] = extract.dict(exclude_none=True)
+        if json_options is not None:
+            if hasattr(json_options.schema, 'schema'):
+                json_options.schema = json_options.schema.schema()
+            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if actions is not None:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        if agent is not None:
+            scrape_params['agent'] = agent.dict(exclude_none=True)
+
+        # Add any additional kwargs
+        scrape_params.update(kwargs)
+
+        # Create final params object
+        final_params = ScrapeParams(**scrape_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['urls'] = urls
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
+        headers = self._prepare_headers(idempotency_key)
+        response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
+
+        if response.status_code == 200:
+            try:
+                crawl_response = BatchScrapeResponse(**response.json())
+                if crawl_response.success and crawl_response.id:
+                    return CrawlWatcher(crawl_response.id, self)
+                else:
+                    raise Exception("Batch scrape job failed to start")
+            except:
+                raise Exception(f'Failed to parse Firecrawl response as JSON.')
         else:
-            raise Exception("Batch scrape job failed to start")
+            self._handle_error(response, 'start batch scrape job')
     
     def check_batch_scrape_status(self, id: str) -> BatchScrapeStatusResponse:
         """
@@ -1203,25 +1562,17 @@ class FirecrawlApp:
                             break
                     status_data['data'] = data
 
-            response = {
+            return BatchScrapeStatusResponse(**{
+                'success': False if 'error' in status_data else True,
                 'status': status_data.get('status'),
                 'total': status_data.get('total'),
                 'completed': status_data.get('completed'),
                 'creditsUsed': status_data.get('creditsUsed'),
                 'expiresAt': status_data.get('expiresAt'),
-                'data': status_data.get('data')
-            }
-
-            if 'error' in status_data:
-                response['error'] = status_data['error']
-
-            if 'next' in status_data:
-                response['next'] = status_data['next']
-
-            return {
-                'success': False if 'error' in status_data else True,
-                **response
-            }
+                'data': status_data.get('data'),
+                'next': status_data.get('next'),
+                'error': status_data.get('error')
+            })
         else:
             self._handle_error(response, 'check batch scrape status')
 
@@ -1230,7 +1581,7 @@ class FirecrawlApp:
         Returns information about batch scrape errors.
 
         Args:
-          id (str): The ID of the crawl job.
+            id (str): The ID of the crawl job.
 
         Returns:
             CrawlErrorsResponse: A response containing:
@@ -1240,12 +1591,15 @@ class FirecrawlApp:
               * url (str): URL that caused the error
               * error (str): Error message
             * robotsBlocked (List[str]): List of URLs blocked by robots.txt
+
+        Raises:
+            Exception: If the error check request fails
         """
         headers = self._prepare_headers()
         response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
         if response.status_code == 200:
             try:
-                return response.json()
+                return CrawlErrorsResponse(**response.json())
             except:
                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
         else:

From d8792d2301cb8d5fff2228a01294d69b2b32035c Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 00:48:07 -0700
Subject: [PATCH 16/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 359 +++++++++++++------------
 1 file changed, 190 insertions(+), 169 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 1eb5f8e7..82ff9606 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -1608,47 +1608,45 @@ class FirecrawlApp:
     def extract(
             self,
             urls: Optional[List[str]] = None,
-            params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
+            *,
+            prompt: Optional[str] = None,
+            schema_: Optional[Any] = None,
+            system_prompt: Optional[str] = None,
+            allow_external_links: Optional[bool] = False,
+            enable_web_search: Optional[bool] = False,
+            show_sources: Optional[bool] = False,
+            agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
         """
         Extract structured information from URLs.
 
         Args:
-            urls: URLs to extract from
-
-            params: See ExtractParams model:
-
-              Extraction Config:
-              * prompt - Custom extraction prompt
-              * schema - JSON schema/Pydantic model
-              * systemPrompt - System context
-              
-              Behavior Options:
-              * allowExternalLinks - Follow external links
-              * enableWebSearch - Enable web search
-              * includeSubdomains - Include subdomains
-              * showSources - Include source URLs
-              
-              Scraping Options:
-              * scrapeOptions - Page scraping config
+            urls (Optional[List[str]]): URLs to extract from
+            prompt (Optional[str]): Custom extraction prompt
+            schema_ (Optional[Any]): JSON schema/Pydantic model
+            system_prompt (Optional[str]): System context
+            allow_external_links (Optional[bool]): Follow external links
+            enable_web_search (Optional[bool]): Enable web search
+            show_sources (Optional[bool]): Include source URLs
+            agent (Optional[Dict[str, Any]]): Agent configuration
 
         Returns:
-            ExtractResponse with:
-            * Structured data matching schema
-            * Source information if requested
-            * Success/error status
+            ExtractResponse[Any] with:
+            * success (bool): Whether request succeeded
+            * data (Optional[Any]): Extracted data matching schema
+            * error (Optional[str]): Error message if any
 
         Raises:
             ValueError: If prompt/schema missing or extraction fails
         """
         headers = self._prepare_headers()
 
-        if not params or (not params.get('prompt') and not params.get('schema')):
+        if not prompt and not schema_:
             raise ValueError("Either prompt or schema is required")
 
-        if not urls and not params.get('prompt'):
+        if not urls and not prompt:
             raise ValueError("Either urls or prompt is required")
 
-        schema = params.get('schema')
+        schema = schema_
         if schema:
             if hasattr(schema, 'model_json_schema'):
                 # Convert Pydantic model to JSON schema
@@ -1656,26 +1654,22 @@ class FirecrawlApp:
             # Otherwise assume it's already a JSON schema dict
 
         request_data = {
-            'urls': urls,
-            'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
-            'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)), 
-            'showSources': params.get('show_sources', params.get('showSources', False)),
+            'urls': urls or [],
+            'allowExternalLinks': allow_external_links,
+            'enableWebSearch': enable_web_search,
+            'showSources': show_sources,
             'schema': schema,
             'origin': f'python-sdk@{get_version()}'
         }
 
-        if not request_data['urls']:
-            request_data['urls'] = []
         # Only add prompt and systemPrompt if they exist
-        if params.get('prompt'):
-            request_data['prompt'] = params['prompt']
-        if params.get('system_prompt'):
-            request_data['systemPrompt'] = params['system_prompt']
-        elif params.get('systemPrompt'):  # Check legacy field name
-            request_data['systemPrompt'] = params['systemPrompt']
+        if prompt:
+            request_data['prompt'] = prompt
+        if system_prompt:
+            request_data['systemPrompt'] = system_prompt
             
-        if params.get('agent'):
-            request_data['agent'] = params['agent']
+        if agent:
+            request_data['agent'] = agent
 
         try:
             # Send the initial extract request
@@ -1706,7 +1700,7 @@ class FirecrawlApp:
                             except:
                                 raise Exception(f'Failed to parse Firecrawl response as JSON.')
                             if status_data['status'] == 'completed':
-                                return status_data
+                                return ExtractResponse(**status_data)
                             elif status_data['status'] in ['failed', 'cancelled']:
                                 raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
                         else:
@@ -1720,7 +1714,7 @@ class FirecrawlApp:
         except Exception as e:
             raise ValueError(str(e), 500)
 
-        return {'success': False, 'error': "Internal server error."}
+        return ExtractResponse(success=False, error="Internal server error.")
     
     def get_extract_status(self, job_id: str) -> ExtractResponse[Any]:
         """
@@ -1740,7 +1734,7 @@ class FirecrawlApp:
             response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
             if response.status_code == 200:
                 try:
-                    return response.json()
+                    return ExtractResponse(**response.json())
                 except:
                     raise Exception(f'Failed to parse Firecrawl response as JSON.')
             else:
@@ -1751,60 +1745,68 @@ class FirecrawlApp:
     def async_extract(
             self,
             urls: List[str],
-            params: Optional[ExtractParams] = None,
+            *,
+            prompt: Optional[str] = None,
+            schema_: Optional[Any] = None,
+            system_prompt: Optional[str] = None,
+            allow_external_links: Optional[bool] = False,
+            enable_web_search: Optional[bool] = False,
+            show_sources: Optional[bool] = False,
+            agent: Optional[Dict[str, Any]] = None,
             idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
         """
         Initiate an asynchronous extract job.
 
         Args:
             urls (List[str]): URLs to extract information from
-            params (Optional[ExtractParams]): See ExtractParams model:
-              Extraction Config:
-              * prompt - Custom extraction prompt
-              * schema - JSON schema/Pydantic model
-              * systemPrompt - System context
-              
-              Behavior Options:
-              * allowExternalLinks - Follow external links
-              * enableWebSearch - Enable web search
-              * includeSubdomains - Include subdomains
-              * showSources - Include source URLs
-              
-              Scraping Options:
-              * scrapeOptions - Page scraping config
+            prompt (Optional[str]): Custom extraction prompt
+            schema_ (Optional[Any]): JSON schema/Pydantic model
+            system_prompt (Optional[str]): System context
+            allow_external_links (Optional[bool]): Follow external links
+            enable_web_search (Optional[bool]): Enable web search
+            show_sources (Optional[bool]): Include source URLs
+            agent (Optional[Dict[str, Any]]): Agent configuration
             idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
-          ExtractResponse containing:
-          * success (bool): Whether job started successfully
-          * id (str): Unique identifier for the job
-          * error (str, optional): Error message if start failed
+            ExtractResponse[Any] with:
+            * success (bool): Whether request succeeded
+            * data (Optional[Any]): Extracted data matching schema
+            * error (Optional[str]): Error message if any
 
         Raises:
-          ValueError: If job initiation fails
+            ValueError: If job initiation fails
         """
         headers = self._prepare_headers(idempotency_key)
         
-        schema = params.get('schema') if params else None
+        schema = schema_
         if schema:
             if hasattr(schema, 'model_json_schema'):
                 # Convert Pydantic model to JSON schema
                 schema = schema.model_json_schema()
             # Otherwise assume it's already a JSON schema dict
 
-        jsonData = {'urls': urls, **(params or {})}
         request_data = {
-            **jsonData,
-            'allowExternalLinks': params.get('allow_external_links', False) if params else False,
+            'urls': urls,
+            'allowExternalLinks': allow_external_links,
+            'enableWebSearch': enable_web_search,
+            'showSources': show_sources,
             'schema': schema,
             'origin': f'python-sdk@{version}'
         }
 
+        if prompt:
+            request_data['prompt'] = prompt
+        if system_prompt:
+            request_data['systemPrompt'] = system_prompt
+        if agent:
+            request_data['agent'] = agent
+
         try:
             response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
             if response.status_code == 200:
                 try:
-                    return response.json()
+                    return ExtractResponse(**response.json())
                 except:
                     raise Exception(f'Failed to parse Firecrawl response as JSON.')
             else:
@@ -1815,41 +1817,36 @@ class FirecrawlApp:
     def generate_llms_text(
             self,
             url: str,
-            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
+            *,
+            max_urls: Optional[int] = None,
+            show_full_text: Optional[bool] = None,
+            experimental_stream: Optional[bool] = None) -> GenerateLLMsTextStatusResponse:
         """
         Generate LLMs.txt for a given URL and poll until completion.
 
         Args:
-          url: Target URL to generate LLMs.txt from
-
-            params: See GenerateLLMsTextParams model:
-            params: See GenerateLLMsTextParams model:
-
-          params: See GenerateLLMsTextParams model:
-
-            Generation Options:
-            * maxUrls - Maximum URLs to process (default: 10)
-            * showFullText - Include full text in output (default: False)
+            url (str): Target URL to generate LLMs.txt from
+            max_urls (Optional[int]): Maximum URLs to process (default: 10)
+            show_full_text (Optional[bool]): Include full text in output (default: False)
+            experimental_stream (Optional[bool]): Enable experimental streaming
 
         Returns:
-          GenerateLLMsTextStatusResponse with:
-          * Generated LLMs.txt content
-          * Full version if requested
-          * Generation status
-          * Success/error information
+            GenerateLLMsTextStatusResponse with:
+            * Generated LLMs.txt content
+            * Full version if requested
+            * Generation status
+            * Success/error information
 
         Raises:
-          Exception: If generation fails
+            Exception: If generation fails
         """
-        if params is None:
-            params = {}
+        params = GenerateLLMsTextParams(
+            maxUrls=max_urls,
+            showFullText=show_full_text,
+            __experimental_stream=experimental_stream
+        )
 
-        if isinstance(params, dict):
-            generation_params = GenerateLLMsTextParams(**params)
-        else:
-            generation_params = params
-
-        response = self.async_generate_llms_text(url, generation_params)
+        response = self.async_generate_llms_text(url, params)
         if not response.get('success') or 'id' not in response:
             return response
 
@@ -1871,35 +1868,36 @@ class FirecrawlApp:
     def async_generate_llms_text(
             self,
             url: str,
-            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
+            *,
+            max_urls: Optional[int] = None,
+            show_full_text: Optional[bool] = None,
+            experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse:
         """
         Initiate an asynchronous LLMs.txt generation operation.
 
         Args:
-          url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
-          params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Generation configuration parameters:
-            * maxUrls (int, optional): Maximum number of URLs to process (default: 10)
-            * showFullText (bool, optional): Include full text in output (default: False)
+            url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
+            max_urls (Optional[int]): Maximum URLs to process (default: 10)
+            show_full_text (Optional[bool]): Include full text in output (default: False)
+            experimental_stream (Optional[bool]): Enable experimental streaming
 
         Returns:
-          GenerateLLMsTextResponse: A response containing:
-            - success (bool): Whether the generation initiation was successful
-            - id (str): The unique identifier for the generation job
-            - error (str, optional): Error message if initiation failed
+            GenerateLLMsTextResponse: A response containing:
+            * success (bool): Whether the generation initiation was successful
+            * id (str): The unique identifier for the generation job
+            * error (str, optional): Error message if initiation failed
 
         Raises:
-          Exception: If the generation job initiation fails.
+            Exception: If the generation job initiation fails.
         """
-        if params is None:
-            params = {}
-
-        if isinstance(params, dict):
-            generation_params = GenerateLLMsTextParams(**params)
-        else:
-            generation_params = params
+        params = GenerateLLMsTextParams(
+            maxUrls=max_urls,
+            showFullText=show_full_text,
+            __experimental_stream=experimental_stream
+        )
 
         headers = self._prepare_headers()
-        json_data = {'url': url, **generation_params.dict(exclude_none=True)}
+        json_data = {'url': url, **params.dict(exclude_none=True)}
         json_data['origin'] = f"python-sdk@{version}"
 
         try:
@@ -1921,20 +1919,20 @@ class FirecrawlApp:
         Check the status of a LLMs.txt generation operation.
 
         Args:
-          id (str): The unique identifier of the LLMs.txt generation job to check status for.
+            id (str): The unique identifier of the LLMs.txt generation job to check status for.
 
         Returns:
-          GenerateLLMsTextStatusResponse: A response containing:
-          * success (bool): Whether the generation was successful
-          * status (str): Status of generation ("processing", "completed", "failed")
-          * data (Dict[str, str], optional): Generated text with fields:
-            * llmstxt (str): Generated LLMs.txt content
-            * llmsfulltxt (str, optional): Full version if requested
-          * error (str, optional): Error message if generation failed
-          * expiresAt (str): When the generated data expires
+            GenerateLLMsTextStatusResponse: A response containing:
+            * success (bool): Whether the generation was successful
+            * status (str): Status of generation ("processing", "completed", "failed")
+            * data (Dict[str, str], optional): Generated text with fields:
+              * llmstxt (str): Generated LLMs.txt content
+              * llmsfulltxt (str, optional): Full version if requested
+            * error (str, optional): Error message if generation failed
+            * expiresAt (str): When the generated data expires
 
         Raises:
-          Exception: If the status check fails.
+            Exception: If the status check fails.
         """
         headers = self._prepare_headers()
         try:
@@ -2172,52 +2170,57 @@ class FirecrawlApp:
     def deep_research(
             self,
             query: str,
-            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, 
+            *,
+            max_depth: Optional[int] = None,
+            time_limit: Optional[int] = None,
+            max_urls: Optional[int] = None,
+            analysis_prompt: Optional[str] = None,
+            system_prompt: Optional[str] = None,
+            __experimental_stream_steps: Optional[bool] = None,
             on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
             on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
         """
         Initiates a deep research operation on a given query and polls until completion.
 
         Args:
-          query: Research query or topic to investigate
-
-          params: See DeepResearchParams model:
-            Research Settings:
-              * maxDepth - Maximum research depth (default: 7)
-              * timeLimit - Time limit in seconds (default: 270)
-              * maxUrls - Maximum URLs to process (default: 20)
-
-          Callbacks:
-          * on_activity - Progress callback receiving:
-              {type, status, message, timestamp, depth}
-          * on_source - Source discovery callback receiving:
-              {url, title, description}
+            query (str): Research query or topic to investigate
+            max_depth (Optional[int]): Maximum depth of research exploration
+            time_limit (Optional[int]): Time limit in seconds for research
+            max_urls (Optional[int]): Maximum number of URLs to process
+            analysis_prompt (Optional[str]): Custom prompt for analysis
+            system_prompt (Optional[str]): Custom system prompt
+            __experimental_stream_steps (Optional[bool]): Enable experimental streaming
+            on_activity (Optional[Callable]): Progress callback receiving {type, status, message, timestamp, depth}
+            on_source (Optional[Callable]): Source discovery callback receiving {url, title, description}
 
         Returns:
-          DeepResearchResponse containing:
-
-          Status:
-          * success - Whether research completed successfully
-          * status - Current state (processing/completed/failed)
-          * error - Error message if failed
-          
-          Results:
-          * id - Unique identifier for the research job
-          * data - Research findings and analysis
-          * sources - List of discovered sources
-          * activities - Research progress log
-          * summaries - Generated research summaries
+            DeepResearchStatusResponse containing:
+            * success (bool): Whether research completed successfully
+            * status (str): Current state (processing/completed/failed)
+            * error (Optional[str]): Error message if failed
+            * id (str): Unique identifier for the research job
+            * data (Any): Research findings and analysis
+            * sources (List[Dict]): List of discovered sources
+            * activities (List[Dict]): Research progress log
+            * summaries (List[str]): Generated research summaries
 
         Raises:
-          Exception: If research fails
+            Exception: If research fails
         """
-        if params is None:
-            params = {}
-
-        if isinstance(params, dict):
-            research_params = DeepResearchParams(**params)
-        else:
-            research_params = params
+        research_params = {}
+        if max_depth is not None:
+            research_params['maxDepth'] = max_depth
+        if time_limit is not None:
+            research_params['timeLimit'] = time_limit
+        if max_urls is not None:
+            research_params['maxUrls'] = max_urls
+        if analysis_prompt is not None:
+            research_params['analysisPrompt'] = analysis_prompt
+        if system_prompt is not None:
+            research_params['systemPrompt'] = system_prompt
+        if __experimental_stream_steps is not None:
+            research_params['__experimental_streamSteps'] = __experimental_stream_steps
+        research_params = DeepResearchParams(**research_params)
 
         response = self.async_deep_research(query, research_params)
         if not response.get('success') or 'id' not in response:
@@ -2253,19 +2256,30 @@ class FirecrawlApp:
 
         return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
 
-    def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> Dict[str, Any]:
+    def async_deep_research(
+            self,
+            query: str,
+            *,
+            max_depth: Optional[int] = None,
+            time_limit: Optional[int] = None,
+            max_urls: Optional[int] = None,
+            analysis_prompt: Optional[str] = None,
+            system_prompt: Optional[str] = None,
+            __experimental_stream_steps: Optional[bool] = None) -> Dict[str, Any]:
         """
         Initiates an asynchronous deep research operation.
 
         Args:
-            query (str): The research query to investigate. Should be a clear, specific question or topic.
-            params (Optional[Union[Dict[str, Any], DeepResearchParams]]): Research configuration parameters:
-              * maxDepth (int, optional): Maximum depth of research exploration (default: 7)
-              * timeLimit (int, optional): Time limit in seconds for research (default: 270)
-              * maxUrls (int, optional): Maximum number of URLs to process (default: 20)
+            query (str): Research query or topic to investigate
+            max_depth (Optional[int]): Maximum depth of research exploration
+            time_limit (Optional[int]): Time limit in seconds for research
+            max_urls (Optional[int]): Maximum number of URLs to process
+            analysis_prompt (Optional[str]): Custom prompt for analysis
+            system_prompt (Optional[str]): Custom system prompt
+            __experimental_stream_steps (Optional[bool]): Enable experimental streaming
 
         Returns:
-          DeepResearchResponse: A response containing:
+            Dict[str, Any]: A response containing:
             * success (bool): Whether the research initiation was successful
             * id (str): The unique identifier for the research job
             * error (str, optional): Error message if initiation failed
@@ -2273,13 +2287,20 @@ class FirecrawlApp:
         Raises:
             Exception: If the research initiation fails.
         """
-        if params is None:
-            params = {}
-
-        if isinstance(params, dict):
-            research_params = DeepResearchParams(**params)
-        else:
-            research_params = params
+        research_params = {}
+        if max_depth is not None:
+            research_params['maxDepth'] = max_depth
+        if time_limit is not None:
+            research_params['timeLimit'] = time_limit
+        if max_urls is not None:
+            research_params['maxUrls'] = max_urls
+        if analysis_prompt is not None:
+            research_params['analysisPrompt'] = analysis_prompt
+        if system_prompt is not None:
+            research_params['systemPrompt'] = system_prompt
+        if __experimental_stream_steps is not None:
+            research_params['__experimental_streamSteps'] = __experimental_stream_steps
+        research_params = DeepResearchParams(**research_params)
 
         headers = self._prepare_headers()
         

From 390f3d44a344b822b8fc31bc8a01f1cbead0c1dd Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 00:51:06 -0700
Subject: [PATCH 17/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 82ff9606..f0cc707c 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -402,7 +402,7 @@ class ExtractParams(pydantic.BaseModel):
     Parameters for the extract operation.
     """
     prompt: Optional[str] = None
-    schema_: Optional[Any] = pydantic.Field(None, alias='schema')
+    schema: Optional[Any] = pydantic.Field(None, alias='schema')
     system_prompt: Optional[str] = None
     allow_external_links: Optional[bool] = False
     enable_web_search: Optional[bool] = False
@@ -1610,7 +1610,7 @@ class FirecrawlApp:
             urls: Optional[List[str]] = None,
             *,
             prompt: Optional[str] = None,
-            schema_: Optional[Any] = None,
+            schema: Optional[Any] = None,
             system_prompt: Optional[str] = None,
             allow_external_links: Optional[bool] = False,
             enable_web_search: Optional[bool] = False,
@@ -1622,7 +1622,7 @@ class FirecrawlApp:
         Args:
             urls (Optional[List[str]]): URLs to extract from
             prompt (Optional[str]): Custom extraction prompt
-            schema_ (Optional[Any]): JSON schema/Pydantic model
+            schema (Optional[Any]): JSON schema/Pydantic model
             system_prompt (Optional[str]): System context
             allow_external_links (Optional[bool]): Follow external links
             enable_web_search (Optional[bool]): Enable web search
@@ -1640,13 +1640,12 @@ class FirecrawlApp:
         """
         headers = self._prepare_headers()
 
-        if not prompt and not schema_:
+        if not prompt and not schema:
             raise ValueError("Either prompt or schema is required")
 
         if not urls and not prompt:
             raise ValueError("Either urls or prompt is required")
 
-        schema = schema_
         if schema:
             if hasattr(schema, 'model_json_schema'):
                 # Convert Pydantic model to JSON schema
@@ -1747,7 +1746,7 @@ class FirecrawlApp:
             urls: List[str],
             *,
             prompt: Optional[str] = None,
-            schema_: Optional[Any] = None,
+            schema: Optional[Any] = None,
             system_prompt: Optional[str] = None,
             allow_external_links: Optional[bool] = False,
             enable_web_search: Optional[bool] = False,
@@ -1760,7 +1759,7 @@ class FirecrawlApp:
         Args:
             urls (List[str]): URLs to extract information from
             prompt (Optional[str]): Custom extraction prompt
-            schema_ (Optional[Any]): JSON schema/Pydantic model
+            schema (Optional[Any]): JSON schema/Pydantic model
             system_prompt (Optional[str]): System context
             allow_external_links (Optional[bool]): Follow external links
             enable_web_search (Optional[bool]): Enable web search
@@ -1779,7 +1778,7 @@ class FirecrawlApp:
         """
         headers = self._prepare_headers(idempotency_key)
         
-        schema = schema_
+        schema = schema
         if schema:
             if hasattr(schema, 'model_json_schema'):
                 # Convert Pydantic model to JSON schema

From 1aa0c092e05bde9d867a6a5fe54175797fa37c2f Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:01:01 -0700
Subject: [PATCH 18/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 70 +++++++++++++-------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index f0cc707c..117ca093 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -49,40 +49,40 @@ logger : logging.Logger = logging.getLogger("firecrawl")
 
 T = TypeVar('T')
 
-class FirecrawlDocumentMetadata(pydantic.BaseModel):
-    """Metadata for a Firecrawl document."""
-    title: Optional[str] = None
-    description: Optional[str] = None
-    language: Optional[str] = None
-    keywords: Optional[str] = None
-    robots: Optional[str] = None
-    ogTitle: Optional[str] = None
-    ogDescription: Optional[str] = None
-    ogUrl: Optional[str] = None
-    ogImage: Optional[str] = None
-    ogAudio: Optional[str] = None
-    ogDeterminer: Optional[str] = None
-    ogLocale: Optional[str] = None
-    ogLocaleAlternate: Optional[List[str]] = None
-    ogSiteName: Optional[str] = None
-    ogVideo: Optional[str] = None
-    dctermsCreated: Optional[str] = None
-    dcDateCreated: Optional[str] = None
-    dcDate: Optional[str] = None
-    dctermsType: Optional[str] = None
-    dcType: Optional[str] = None
-    dctermsAudience: Optional[str] = None
-    dctermsSubject: Optional[str] = None
-    dcSubject: Optional[str] = None
-    dcDescription: Optional[str] = None
-    dctermsKeywords: Optional[str] = None
-    modifiedTime: Optional[str] = None
-    publishedTime: Optional[str] = None
-    articleTag: Optional[str] = None
-    articleSection: Optional[str] = None
-    sourceURL: Optional[str] = None
-    statusCode: Optional[int] = None
-    error: Optional[str] = None
+# class FirecrawlDocumentMetadata(pydantic.BaseModel):
+#     """Metadata for a Firecrawl document."""
+#     title: Optional[str] = None
+#     description: Optional[str] = None
+#     language: Optional[str] = None
+#     keywords: Optional[str] = None
+#     robots: Optional[str] = None
+#     ogTitle: Optional[str] = None
+#     ogDescription: Optional[str] = None
+#     ogUrl: Optional[str] = None
+#     ogImage: Optional[str] = None
+#     ogAudio: Optional[str] = None
+#     ogDeterminer: Optional[str] = None
+#     ogLocale: Optional[str] = None
+#     ogLocaleAlternate: Optional[List[str]] = None
+#     ogSiteName: Optional[str] = None
+#     ogVideo: Optional[str] = None
+#     dctermsCreated: Optional[str] = None
+#     dcDateCreated: Optional[str] = None
+#     dcDate: Optional[str] = None
+#     dctermsType: Optional[str] = None
+#     dcType: Optional[str] = None
+#     dctermsAudience: Optional[str] = None
+#     dctermsSubject: Optional[str] = None
+#     dcSubject: Optional[str] = None
+#     dcDescription: Optional[str] = None
+#     dctermsKeywords: Optional[str] = None
+#     modifiedTime: Optional[str] = None
+#     publishedTime: Optional[str] = None
+#     articleTag: Optional[str] = None
+#     articleSection: Optional[str] = None
+#     sourceURL: Optional[str] = None
+#     statusCode: Optional[int] = None
+#     error: Optional[str] = None
 
 class AgentOptions(pydantic.BaseModel):
     """Configuration for the agent."""
@@ -107,7 +107,7 @@ class FirecrawlDocument(pydantic.BaseModel, Generic[T]):
     extract: Optional[T] = None
     json: Optional[T] = None
     screenshot: Optional[str] = None
-    metadata: Optional[FirecrawlDocumentMetadata] = None
+    metadata: Optional[Any] = None
     actions: Optional[ActionsResult] = None
     title: Optional[str] = None  # v1 search only
     description: Optional[str] = None  # v1 search only

From 8cd82b5600620a13dba590722271c2bd1a2d1475 Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 18 Apr 2025 01:06:58 -0700
Subject: [PATCH 19/26] async scrape

---
 apps/python-sdk/firecrawl/firecrawl.py | 115 +++++++++++++++++--------
 1 file changed, 81 insertions(+), 34 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index ea99aaa7..f66b25d5 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -2308,29 +2308,41 @@ class AsyncFirecrawlApp(FirecrawlApp):
     async def scrape_url(
             self,
             url: str,
-            params: Optional[ScrapeParams] = None) -> ScrapeResponse[Any]:
+            formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+            include_tags: Optional[List[str]] = None,
+            exclude_tags: Optional[List[str]] = None,
+            only_main_content: Optional[bool] = None,
+            wait_for: Optional[int] = None,
+            timeout: Optional[int] = None,
+            location: Optional[LocationConfig] = None,
+            mobile: Optional[bool] = None,
+            skip_tls_verification: Optional[bool] = None,
+            remove_base64_images: Optional[bool] = None,
+            block_ads: Optional[bool] = None,
+            proxy: Optional[Literal["basic", "stealth"]] = None,
+            extract: Optional[ExtractConfig] = None,
+            json_options: Optional[ExtractConfig] = None,
+            actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
         """
-        Asynchronously scrape and extract content from a URL.
+        Scrape and extract content from a URL asynchronously.
 
         Args:
-            url (str): Target URL to scrape
-            params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
-              Content Options:
-              * formats - Content types to retrieve (markdown/html/etc)
-              * includeTags - HTML tags to include
-              * excludeTags - HTML tags to exclude
-              * onlyMainContent - Extract main content only
-                  
-              Request Options:
-              * headers - Custom HTTP headers
-              * timeout - Request timeout (ms)
-              * mobile - Use mobile user agent
-              * proxy - Proxy type (basic/stealth)
-                  
-              Extraction Options:
-              * extract - Content extraction settings
-              * jsonOptions - JSON extraction settings
-              * actions - Actions to perform
+          url (str): Target URL to scrape
+          formats (Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]]): Content types to retrieve (markdown/html/etc)
+          include_tags (Optional[List[str]]): HTML tags to include
+          exclude_tags (Optional[List[str]]): HTML tags to exclude
+          only_main_content (Optional[bool]): Extract main content only
+          wait_for (Optional[int]): Wait for a specific element to appear
+          timeout (Optional[int]): Request timeout (ms)
+          location (Optional[LocationConfig]): Location configuration
+          mobile (Optional[bool]): Use mobile user agent
+          skip_tls_verification (Optional[bool]): Skip TLS verification
+          remove_base64_images (Optional[bool]): Remove base64 images
+          block_ads (Optional[bool]): Block ads
+          proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
+          extract (Optional[ExtractConfig]): Content extraction settings
+          json_options (Optional[ExtractConfig]): JSON extraction settings
+          actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
 
         Returns:
           ScrapeResponse with:
@@ -2340,35 +2352,70 @@ class AsyncFirecrawlApp(FirecrawlApp):
           * Success/error status
 
         Raises:
-            Exception: If scraping fails
+          Exception: If scraping fails
         """
         headers = self._prepare_headers()
-        scrape_params = {'url': url, 'origin': f'python-sdk@{version}'}
 
-        if params:
-            extract = params.get('extract', {})
-            if extract:
-                if 'schema' in extract and hasattr(extract['schema'], 'schema'):
-                    extract['schema'] = extract['schema'].schema()
-                scrape_params['extract'] = extract
+        # Build scrape parameters
+        scrape_params = {
+            'url': url,
+            'origin': f"python-sdk@{version}"
+        }
 
-            for key, value in params.items():
-                if key not in ['extract']:
-                    scrape_params[key] = value
+        # Add optional parameters if provided and not None
+        if formats:
+            scrape_params['formats'] = formats
+        if include_tags:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for:
+            scrape_params['waitFor'] = wait_for
+        if timeout:
+            scrape_params['timeout'] = timeout
+        if location:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy:
+            scrape_params['proxy'] = proxy
+        if extract:
+            extract_dict = extract.dict(exclude_none=True)
+            if 'schema' in extract_dict and hasattr(extract.schema, 'schema'):
+                extract_dict['schema'] = extract.schema.schema() # Ensure pydantic model schema is converted
+            scrape_params['extract'] = extract_dict
+        if json_options:
+            json_options_dict = json_options.dict(exclude_none=True)
+            if 'schema' in json_options_dict and hasattr(json_options.schema, 'schema'):
+                 json_options_dict['schema'] = json_options.schema.schema() # Ensure pydantic model schema is converted
+            scrape_params['jsonOptions'] = json_options_dict
+        if actions:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
 
+        # Make async request
         endpoint = f'/v1/scrape'
         response = await self._async_post_request(
             f'{self.api_url}{endpoint}',
             scrape_params,
             headers
         )
-        
+
         if response.get('success') and 'data' in response:
-            return response['data']
+            return ScrapeResponse(**response['data'])
         elif "error" in response:
             raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
         else:
-            raise Exception(f'Failed to scrape URL. Error: {response}')
+            # Use the response content directly if possible, otherwise a generic message
+            error_content = response.get('error', str(response))
+            raise Exception(f'Failed to scrape URL. Error: {error_content}')
 
     async def batch_scrape_urls(
             self,

From 0b62be58745d8775daf2c2ffa1806f356315db54 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:12:24 -0700
Subject: [PATCH 20/26] Update firecrawl.py

---
 apps/python-sdk/firecrawl/firecrawl.py | 400 +++++++++++++++----------
 1 file changed, 234 insertions(+), 166 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 117ca093..5a3a80d0 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -3356,12 +3356,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
             job_id (str): The ID of the extraction job
 
         Returns:
-            ExtractResponse containing:
-            * success (bool): Whether extraction completed successfully
-            * data (Any): Extracted structured data
-            * error (str, optional): Error message if extraction failed
-            * warning (str, optional): Warning message if any
-            * sources (List[str], optional): Source URLs if requested
+            ExtractResponse[Any] with:
+            * success (bool): Whether request succeeded
+            * data (Optional[Any]): Extracted data matching schema
+            * error (Optional[str]): Error message if any
+            * warning (Optional[str]): Warning message if any
+            * sources (Optional[List[str]]): Source URLs if requested
 
         Raises:
             ValueError: If status check fails
@@ -3377,54 +3377,67 @@ class AsyncFirecrawlApp(FirecrawlApp):
 
     async def async_extract(
             self,
-            urls: List[str],
-            params: Optional[ExtractParams] = None,
+            urls: Optional[List[str]] = None,
+            *,
+            prompt: Optional[str] = None,
+            schema: Optional[Any] = None,
+            system_prompt: Optional[str] = None,
+            allow_external_links: Optional[bool] = False,
+            enable_web_search: Optional[bool] = False,
+            show_sources: Optional[bool] = False,
+            agent: Optional[Dict[str, Any]] = None,
             idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
         """
         Initiate an asynchronous extraction job without waiting for completion.
 
         Args:
-            urls (List[str]): URLs to extract information from
-            params (Optional[ExtractParams]): See ExtractParams model:
-              Extraction Config:
-              * prompt - Custom extraction prompt
-              * schema - JSON schema/Pydantic model
-              * systemPrompt - System context
-              
-              Behavior Options:
-              * allowExternalLinks - Follow external links
-              * enableWebSearch - Enable web search
-              * includeSubdomains - Include subdomains
-              * showSources - Include source URLs
-              
-              Scraping Options:
-              * scrapeOptions - Page scraping config
+            urls (Optional[List[str]]): URLs to extract from
+            prompt (Optional[str]): Custom extraction prompt
+            schema (Optional[Any]): JSON schema/Pydantic model
+            system_prompt (Optional[str]): System context
+            allow_external_links (Optional[bool]): Follow external links
+            enable_web_search (Optional[bool]): Enable web search
+            show_sources (Optional[bool]): Include source URLs
+            agent (Optional[Dict[str, Any]]): Agent configuration
             idempotency_key (Optional[str]): Unique key to prevent duplicate requests
 
         Returns:
-          ExtractResponse containing:
-          * success (bool): Whether job started successfully
-          * id (str): Unique identifier for the job
-          * error (str, optional): Error message if start failed
+            ExtractResponse[Any] with:
+            * success (bool): Whether request succeeded
+            * data (Optional[Any]): Extracted data matching schema
+            * error (Optional[str]): Error message if any
 
         Raises:
-          ValueError: If job initiation fails
+            ValueError: If job initiation fails
         """
         headers = self._prepare_headers(idempotency_key)
-        
-        schema = params.get('schema') if params else None
+
+        if not prompt and not schema:
+            raise ValueError("Either prompt or schema is required")
+
+        if not urls and not prompt:
+            raise ValueError("Either urls or prompt is required")
+
         if schema:
             if hasattr(schema, 'model_json_schema'):
                 schema = schema.model_json_schema()
 
-        jsonData = {'urls': urls, **(params or {})}
         request_data = {
-            **jsonData,
-            'allowExternalLinks': params.get('allow_external_links', False) if params else False,
+            'urls': urls or [],
+            'allowExternalLinks': allow_external_links,
+            'enableWebSearch': enable_web_search,
+            'showSources': show_sources,
             'schema': schema,
             'origin': f'python-sdk@{version}'
         }
 
+        if prompt:
+            request_data['prompt'] = prompt
+        if system_prompt:
+            request_data['systemPrompt'] = system_prompt
+        if agent:
+            request_data['agent'] = agent
+
         try:
             return await self._async_post_request(
                 f'{self.api_url}/v1/extract',
@@ -3437,16 +3450,18 @@ class AsyncFirecrawlApp(FirecrawlApp):
     async def generate_llms_text(
             self,
             url: str,
-            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
+            *,
+            max_urls: Optional[int] = None,
+            show_full_text: Optional[bool] = None,
+            experimental_stream: Optional[bool] = None) -> GenerateLLMsTextStatusResponse:
         """
         Generate LLMs.txt for a given URL and monitor until completion.
 
         Args:
             url (str): Target URL to generate LLMs.txt from
-            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model:
-              Generation Options:
-              * maxUrls - Maximum URLs to process (default: 10)
-              * showFullText - Include full text in output (default: False)
+            max_urls (Optional[int]): Maximum URLs to process (default: 10)
+            show_full_text (Optional[bool]): Include full text in output (default: False)
+            experimental_stream (Optional[bool]): Enable experimental streaming
 
         Returns:
             GenerateLLMsTextStatusResponse containing:
@@ -3461,15 +3476,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
         Raises:
             Exception: If generation fails
         """
-        if params is None:
-            params = {}
+        params = {}
+        if max_urls is not None:
+            params['maxUrls'] = max_urls
+        if show_full_text is not None:
+            params['showFullText'] = show_full_text
+        if experimental_stream is not None:
+            params['__experimental_stream'] = experimental_stream
 
-        if isinstance(params, dict):
-            generation_params = GenerateLLMsTextParams(**params)
-        else:
-            generation_params = params
-
-        response = await self.async_generate_llms_text(url, generation_params)
+        response = await self.async_generate_llms_text(url, params)
         if not response.get('success') or 'id' not in response:
             return response
 
@@ -3491,36 +3506,38 @@ class AsyncFirecrawlApp(FirecrawlApp):
     async def async_generate_llms_text(
             self,
             url: str,
-            params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
+            *,
+            max_urls: Optional[int] = None,
+            show_full_text: Optional[bool] = None,
+            experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse:
         """
         Initiate an asynchronous LLMs.txt generation job without waiting for completion.
 
         Args:
-          url (str): Target URL to generate LLMs.txt from
-          params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model:
-            Generation Options:
-            * maxUrls - Maximum URLs to process (default: 10)
-            * showFullText - Include full text in output (default: False)
+            url (str): Target URL to generate LLMs.txt from
+            max_urls (Optional[int]): Maximum URLs to process (default: 10)
+            show_full_text (Optional[bool]): Include full text in output (default: False)
+            experimental_stream (Optional[bool]): Enable experimental streaming
 
         Returns:
-          GenerateLLMsTextResponse containing:
-          * success (bool): Whether job started successfully
-          * id (str): Unique identifier for the job
-          * error (str, optional): Error message if start failed
+            GenerateLLMsTextResponse containing:
+            * success (bool): Whether job started successfully
+            * id (str): Unique identifier for the job
+            * error (str, optional): Error message if start failed
 
         Raises:
-          ValueError: If job initiation fails
+            ValueError: If job initiation fails
         """
-        if params is None:
-            params = {}
-
-        if isinstance(params, dict):
-            generation_params = GenerateLLMsTextParams(**params)
-        else:
-            generation_params = params
+        params = {}
+        if max_urls is not None:
+            params['maxUrls'] = max_urls
+        if show_full_text is not None:
+            params['showFullText'] = show_full_text
+        if experimental_stream is not None:
+            params['__experimental_stream'] = experimental_stream
 
         headers = self._prepare_headers()
-        json_data = {'url': url, **generation_params.dict(exclude_none=True)}
+        json_data = {'url': url, **params.dict(exclude_none=True)}
         json_data['origin'] = f"python-sdk@{version}"
 
         try:
@@ -3564,52 +3581,57 @@ class AsyncFirecrawlApp(FirecrawlApp):
     async def deep_research(
             self,
             query: str,
-            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, 
+            *,
+            max_depth: Optional[int] = None,
+            time_limit: Optional[int] = None,
+            max_urls: Optional[int] = None,
+            analysis_prompt: Optional[str] = None,
+            system_prompt: Optional[str] = None,
+            __experimental_stream_steps: Optional[bool] = None,
             on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
             on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
         """
-        Initiates a deep research operation on a given query and polls until completion, providing real-time updates via callbacks.
+        Initiates a deep research operation on a given query and polls until completion.
 
         Args:
-          query: Research query or topic to investigate
-
-          params: See DeepResearchParams model:
-            Research Settings:
-              * maxDepth - Maximum research depth (default: 7)
-              * timeLimit - Time limit in seconds (default: 270)
-              * maxUrls - Maximum URLs to process (default: 20)
-
-          Callbacks:
-          * on_activity - Progress callback receiving:
-              {type, status, message, timestamp, depth}
-          * on_source - Source discovery callback receiving:
-              {url, title, description}
+            query (str): Research query or topic to investigate
+            max_depth (Optional[int]): Maximum depth of research exploration
+            time_limit (Optional[int]): Time limit in seconds for research
+            max_urls (Optional[int]): Maximum number of URLs to process
+            analysis_prompt (Optional[str]): Custom prompt for analysis
+            system_prompt (Optional[str]): Custom system prompt
+            __experimental_stream_steps (Optional[bool]): Enable experimental streaming
+            on_activity (Optional[Callable]): Progress callback receiving {type, status, message, timestamp, depth}
+            on_source (Optional[Callable]): Source discovery callback receiving {url, title, description}
 
         Returns:
-          DeepResearchResponse containing:
-
-          Status:
-          * success - Whether research completed successfully
-          * status - Current state (processing/completed/failed)
-          * error - Error message if failed
-          
-          Results:
-          * id - Unique identifier for the research job
-          * data - Research findings and analysis
-          * sources - List of discovered sources
-          * activities - Research progress log
-          * summaries - Generated research summaries
+            DeepResearchStatusResponse containing:
+            * success (bool): Whether research completed successfully
+            * status (str): Current state (processing/completed/failed)
+            * error (Optional[str]): Error message if failed
+            * id (str): Unique identifier for the research job
+            * data (Any): Research findings and analysis
+            * sources (List[Dict]): List of discovered sources
+            * activities (List[Dict]): Research progress log
+            * summaries (List[str]): Generated research summaries
 
         Raises:
-          Exception: If research fails
+            Exception: If research fails
         """
-        if params is None:
-            params = {}
-
-        if isinstance(params, dict):
-            research_params = DeepResearchParams(**params)
-        else:
-            research_params = params
+        research_params = {}
+        if max_depth is not None:
+            research_params['maxDepth'] = max_depth
+        if time_limit is not None:
+            research_params['timeLimit'] = time_limit
+        if max_urls is not None:
+            research_params['maxUrls'] = max_urls
+        if analysis_prompt is not None:
+            research_params['analysisPrompt'] = analysis_prompt
+        if system_prompt is not None:
+            research_params['systemPrompt'] = system_prompt
+        if __experimental_stream_steps is not None:
+            research_params['__experimental_streamSteps'] = __experimental_stream_steps
+        research_params = DeepResearchParams(**research_params)
 
         response = await self.async_deep_research(query, research_params)
         if not response.get('success') or 'id' not in response:
@@ -3648,38 +3670,54 @@ class AsyncFirecrawlApp(FirecrawlApp):
     async def async_deep_research(
             self,
             query: str,
-            params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse:
+            *,
+            max_depth: Optional[int] = None,
+            time_limit: Optional[int] = None,
+            max_urls: Optional[int] = None,
+            analysis_prompt: Optional[str] = None,
+            system_prompt: Optional[str] = None,
+            __experimental_stream_steps: Optional[bool] = None) -> Dict[str, Any]:
         """
-        Initiate an asynchronous deep research job without waiting for completion.
+        Initiates an asynchronous deep research operation.
 
         Args:
             query (str): Research query or topic to investigate
-            params (Optional[Union[Dict[str, Any], DeepResearchParams]]): See DeepResearchParams model:
-              Research Settings:
-              * maxDepth - Maximum research depth (default: 7)
-              * timeLimit - Time limit in seconds (default: 270)
-              * maxUrls - Maximum URLs to process (default: 20)
+            max_depth (Optional[int]): Maximum depth of research exploration
+            time_limit (Optional[int]): Time limit in seconds for research
+            max_urls (Optional[int]): Maximum number of URLs to process
+            analysis_prompt (Optional[str]): Custom prompt for analysis
+            system_prompt (Optional[str]): Custom system prompt
+            __experimental_stream_steps (Optional[bool]): Enable experimental streaming
 
         Returns:
-          DeepResearchResponse containing:
-          * success (bool): Whether job started successfully
-          * id (str): Unique identifier for the job
-          * error (str, optional): Error message if start failed
+            Dict[str, Any]: A response containing:
+            * success (bool): Whether the research initiation was successful
+            * id (str): The unique identifier for the research job
+            * error (str, optional): Error message if initiation failed
 
         Raises:
-          ValueError: If job initiation fails
+            Exception: If the research initiation fails.
         """
-        if params is None:
-            params = {}
-
-        if isinstance(params, dict):
-            research_params = DeepResearchParams(**params)
-        else:
-            research_params = params
+        research_params = {}
+        if max_depth is not None:
+            research_params['maxDepth'] = max_depth
+        if time_limit is not None:
+            research_params['timeLimit'] = time_limit
+        if max_urls is not None:
+            research_params['maxUrls'] = max_urls
+        if analysis_prompt is not None:
+            research_params['analysisPrompt'] = analysis_prompt
+        if system_prompt is not None:
+            research_params['systemPrompt'] = system_prompt
+        if __experimental_stream_steps is not None:
+            research_params['__experimental_streamSteps'] = __experimental_stream_steps
+        research_params = DeepResearchParams(**research_params)
 
         headers = self._prepare_headers()
+        
         json_data = {'query': query, **research_params.dict(exclude_none=True)}
         json_data['origin'] = f"python-sdk@{version}"
+
         try:
             return await self._async_post_request(
                 f'{self.api_url}/v1/deep-research',
@@ -3691,26 +3729,28 @@ class AsyncFirecrawlApp(FirecrawlApp):
 
     async def check_deep_research_status(self, id: str) -> DeepResearchStatusResponse:
         """
-        Check the status of an asynchronous deep research job.
+        Check the status of a deep research operation.
 
         Args:
-            id (str): The ID of the research job
+            id (str): The ID of the deep research operation.
 
         Returns:
-            DeepResearchStatusResponse containing:
-            * success (bool): Whether research completed successfully
-            * status (str): Current state (processing/completed/failed)
-            * data (Dict[str, Any], optional): Research findings and analysis
-            * error (str, optional): Error message if failed
-            * expiresAt (str): When the research data expires
-            * currentDepth (int): Current research depth
-            * maxDepth (int): Maximum research depth
-            * activities (List[Dict[str, Any]]): Research progress log
-            * sources (List[Dict[str, Any]]): Discovered sources
-            * summaries (List[str]): Generated research summaries
+            DeepResearchResponse containing:
+
+            Status:
+            * success - Whether research completed successfully
+            * status - Current state (processing/completed/failed)
+            * error - Error message if failed
+            
+            Results:
+            * id - Unique identifier for the research job
+            * data - Research findings and analysis
+            * sources - List of discovered sources
+            * activities - Research progress log
+            * summaries - Generated research summaries
 
         Raises:
-            ValueError: If status check fails
+            Exception: If the status check fails.
         """
         headers = self._prepare_headers()
         try:
@@ -3724,52 +3764,80 @@ class AsyncFirecrawlApp(FirecrawlApp):
     async def search(
             self,
             query: str,
-            params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse:
+            *,
+            limit: Optional[int] = None,
+            tbs: Optional[str] = None,
+            filter: Optional[str] = None,
+            lang: Optional[str] = None,
+            country: Optional[str] = None,
+            location: Optional[str] = None,
+            timeout: Optional[int] = None,
+            scrape_options: Optional[CommonOptions] = None,
+            params: Optional[Union[Dict[str, Any], SearchParams]] = None,
+            **kwargs) -> SearchResponse:
         """
         Asynchronously search for content using Firecrawl.
 
         Args:
-          query (str): Search query string
-          params (Optional[Union[Dict[str, Any], SearchParams]]): See SearchParams model:
-            Search Options:
-            * limit - Max results (default: 5)
-            * tbs - Time filter (e.g. "qdr:d")
-            * filter - Custom result filter
-            
-            Localization:
-            * lang - Language code (default: "en")
-            * country - Country code (default: "us")
-            * location - Geo-targeting
-            
-            Request Options:
-            * timeout - Request timeout (ms)
-            * scrapeOptions - Result scraping config
+            query (str): Search query string
+            limit (Optional[int]): Max results (default: 5)
+            tbs (Optional[str]): Time filter (e.g. "qdr:d")
+            filter (Optional[str]): Custom result filter
+            lang (Optional[str]): Language code (default: "en")
+            country (Optional[str]): Country code (default: "us") 
+            location (Optional[str]): Geo-targeting
+            timeout (Optional[int]): Request timeout in milliseconds
+            scrape_options (Optional[CommonOptions]): Result scraping configuration
+            params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
+            **kwargs: Additional keyword arguments for future compatibility
 
         Returns:
-          SearchResponse containing:
-          * success (bool): Whether search completed successfully
-          * data (List[FirecrawlDocument]): Search results
-          * warning (str, optional): Warning message if any
-          * error (str, optional): Error message if search failed
+            SearchResponse: Response containing:
+                * success (bool): Whether request succeeded
+                * data (List[FirecrawlDocument]): Search results
+                * warning (Optional[str]): Warning message if any
+                * error (Optional[str]): Error message if any
 
         Raises:
-          Exception: If search fails
+            Exception: If search fails or response cannot be parsed
         """
-        if params is None:
-            params = {}
+        # Build search parameters
+        search_params = {}
+        if params:
+            if isinstance(params, dict):
+                search_params.update(params)
+            else:
+                search_params.update(params.dict(exclude_none=True))
 
-        if isinstance(params, dict):
-            search_params = SearchParams(query=query, **params)
-        else:
-            search_params = params
-            search_params.query = query
+        # Add individual parameters
+        if limit is not None:
+            search_params['limit'] = limit
+        if tbs is not None:
+            search_params['tbs'] = tbs
+        if filter is not None:
+            search_params['filter'] = filter
+        if lang is not None:
+            search_params['lang'] = lang
+        if country is not None:
+            search_params['country'] = country
+        if location is not None:
+            search_params['location'] = location
+        if timeout is not None:
+            search_params['timeout'] = timeout
+        if scrape_options is not None:
+            search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
+        
+        # Add any additional kwargs
+        search_params.update(kwargs)
 
-        search_params_dict = search_params.dict(exclude_none=True)
-        search_params_dict['origin'] = f"python-sdk@{version}"
+        # Create final params object
+        final_params = SearchParams(query=query, **search_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['origin'] = f"python-sdk@{version}"
 
         return await self._async_post_request(
             f"{self.api_url}/v1/search",
-            search_params_dict,
+            params_dict,
             {"Authorization": f"Bearer {self.api_key}"}
         )
 

From f3522666db5d2339b5567c7a537dd064704c76c2 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:13:53 -0700
Subject: [PATCH 21/26] Nick: new examples

---
 apps/python-sdk/example.py | 79 ++++++++++++++------------------------
 1 file changed, 29 insertions(+), 50 deletions(-)

diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py
index ae4258f7..705d2e0c 100644
--- a/apps/python-sdk/example.py
+++ b/apps/python-sdk/example.py
@@ -1,53 +1,45 @@
-import time
-import nest_asyncio
-import uuid
-from firecrawl.firecrawl import FirecrawlApp
+from firecrawl.firecrawl import ExtractConfig, FirecrawlApp
 from pydantic import BaseModel, Field
 from typing import List
+import time
+app = FirecrawlApp(api_url="https://api.firecrawl.dev")
 
-app = FirecrawlApp(api_key="fc-")
-
-# Scrape a website:
-scrape_result = app.scrape_url('firecrawl.dev')
-print(scrape_result['markdown'])
+# # Scrape a website:
+scrape_result = app.scrape_url('example.com', formats=["markdown", "html"])
+print(scrape_result.markdown)
 
 
-# Test batch scrape
+# # Test batch scrapeq
 urls = ['https://example.com', 'https://docs.firecrawl.dev']
-batch_scrape_params = {
-    'formats': ['markdown', 'html'],
-}
-
 # Synchronous batch scrape
-batch_result = app.batch_scrape_urls(urls, batch_scrape_params)
+batch_result = app.batch_scrape_urls(urls, formats=["markdown", "html"])
 print("Synchronous Batch Scrape Result:")
-print(batch_result['data'][0]['markdown'])
+print(batch_result.data[0].markdown)
 
-# Asynchronous batch scrape
-async_batch_result = app.async_batch_scrape_urls(urls, batch_scrape_params)
+# # Asynchronous batch scrape
+async_batch_result = app.async_batch_scrape_urls(urls, formats=["markdown", "html"])
 print("\nAsynchronous Batch Scrape Result:")
 print(async_batch_result)
 
 # Crawl a website:
-idempotency_key = str(uuid.uuid4()) # optional idempotency key
-crawl_result = app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
-print(crawl_result)
+crawl_result = app.crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
+print(crawl_result.data[0].markdown)
 
-# Asynchronous Crawl a website:
-async_result = app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "")
+# # Asynchronous Crawl a website:
+async_result = app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
 print(async_result)
 
-crawl_status = app.check_crawl_status(async_result['id'])
+crawl_status = app.check_crawl_status(async_result.id)
 print(crawl_status)
 
 attempts = 15
-while attempts > 0 and crawl_status['status'] != 'completed':
+while attempts > 0 and crawl_status.status != 'completed':
     print(crawl_status)
-    crawl_status = app.check_crawl_status(async_result['id'])
+    crawl_status = app.check_crawl_status(async_result.id)
     attempts -= 1
     time.sleep(1)
 
-crawl_status = app.check_crawl_status(async_result['id'])
+crawl_status = app.check_crawl_status(async_result.id)
 print(crawl_status)
 
 # LLM Extraction:
@@ -61,14 +53,11 @@ class ArticleSchema(BaseModel):
 class TopArticlesSchema(BaseModel):
     top: List[ArticleSchema] = Field(..., description="Top 5 stories")
 
-llm_extraction_result = app.scrape_url('https://news.ycombinator.com', {
-    'formats': ['extract'],
-    'extract': {
-        'schema': TopArticlesSchema.model_json_schema()
-    }
-})
+extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
 
-print(llm_extraction_result['extract'])
+llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
+
+print(llm_extraction_result.extract)
 
 # # Define schema to extract contents into using json schema
 json_schema = {
@@ -94,24 +83,16 @@ json_schema = {
   "required": ["top"]
 }
 
-app2 = FirecrawlApp(api_key="fc-", version="v0")
+extract_config = ExtractConfig(extractionSchema=json_schema, mode="llm-extraction", pageOptions={"onlyMainContent": True})
+llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
 
-
-llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', {
-    'extractorOptions': {
-        'extractionSchema': json_schema,
-        'mode': 'llm-extraction'
-    },
-    'pageOptions':{
-        'onlyMainContent': True
-    }
-})
+print(llm_extraction_result.extract)
 
 # print(llm_extraction_result['llm_extraction'])
 
 
 # Map a website:
-map_result = app.map_url('https://firecrawl.dev', { 'search': 'blog' })
+map_result = app.map_url('https://firecrawl.dev', search="blog")
 print(map_result)
 
 # Extract URLs:
@@ -124,14 +105,12 @@ class ExtractSchema(BaseModel):
 extract_schema = ExtractSchema.schema()
 
 # Perform the extraction
-extract_result = app.extract(['https://firecrawl.dev'], {
-    'prompt': "Extract the title, description, and links from the website",
-    'schema': extract_schema
-})
+extract_result = app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
 print(extract_result)
 
 # Crawl a website with WebSockets:
 # inside an async function...
+import nest_asyncio
 nest_asyncio.apply()
 
 # Define event handlers

From a3f31682127d89a7c6260d75b1522407065227ab Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:15:14 -0700
Subject: [PATCH 22/26] Nick: python sdk 2.0

---
 apps/python-sdk/example_async.py      | 48 +++++++++++----------------
 apps/python-sdk/firecrawl/__init__.py |  2 +-
 2 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/apps/python-sdk/example_async.py b/apps/python-sdk/example_async.py
index d5251515..c554d695 100644
--- a/apps/python-sdk/example_async.py
+++ b/apps/python-sdk/example_async.py
@@ -6,51 +6,47 @@ from firecrawl.firecrawl import AsyncFirecrawlApp
 from pydantic import BaseModel, Field
 from typing import List
 
-app = AsyncFirecrawlApp(api_key="fc-")
+app = AsyncFirecrawlApp(api_url="https://api.firecrawl.dev")
 
 async def example_scrape():
     # Scrape a website:
-    scrape_result = await app.scrape_url('firecrawl.dev')
-    print(scrape_result['markdown'])
+    scrape_result = await app.scrape_url('example.com', formats=["markdown", "html"])
+    print(scrape_result.markdown)
 
 async def example_batch_scrape():
     # Batch scrape
     urls = ['https://example.com', 'https://docs.firecrawl.dev']
-    batch_scrape_params = {
-        'formats': ['markdown', 'html'],
-    }
 
     # Synchronous batch scrape
-    batch_result = await app.batch_scrape_urls(urls, batch_scrape_params)
+    batch_result = await app.batch_scrape_urls(urls, formats=["markdown", "html"])
     print("Synchronous Batch Scrape Result:")
-    print(batch_result['data'][0]['markdown'])
+    print(batch_result.data[0].markdown)
 
     # Asynchronous batch scrape
-    async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params)
+    async_batch_result = await app.async_batch_scrape_urls(urls, formats=["markdown", "html"])
     print("\nAsynchronous Batch Scrape Result:")
     print(async_batch_result)
 
 async def example_crawl():
     # Crawl a website:
-    idempotency_key = str(uuid.uuid4()) # optional idempotency key
-    crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key)
-    print(crawl_result)
+    crawl_result = await app.crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
+    print(crawl_result.data[0].markdown)
 
     # Asynchronous Crawl a website:
-    async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "")
+    async_result = await app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
     print(async_result)
 
-    crawl_status = await app.check_crawl_status(async_result['id'])
+    crawl_status = await app.check_crawl_status(async_result.id)
     print(crawl_status)
 
     attempts = 15
-    while attempts > 0 and crawl_status['status'] != 'completed':
+    while attempts > 0 and crawl_status.status != 'completed':
         print(crawl_status)
-        crawl_status = await app.check_crawl_status(async_result['id'])
+        crawl_status = await app.check_crawl_status(async_result.id)
         attempts -= 1
         await asyncio.sleep(1)  # Use async sleep instead of time.sleep
 
-    crawl_status = await app.check_crawl_status(async_result['id'])
+    crawl_status = await app.check_crawl_status(async_result.id)
     print(crawl_status)
 
 async def example_llm_extraction():
@@ -64,18 +60,15 @@ async def example_llm_extraction():
     class TopArticlesSchema(BaseModel):
         top: List[ArticleSchema] = Field(..., description="Top 5 stories")
 
-    llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', {
-        'formats': ['extract'],
-        'extract': {
-            'schema': TopArticlesSchema.model_json_schema()
-        }
-    })
+    extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
 
-    print(llm_extraction_result['extract'])
+    llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
+
+    print(llm_extraction_result.extract)
 
 async def example_map_and_extract():
     # Map a website:
-    map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' })
+    map_result = await app.map_url('https://firecrawl.dev', search="blog")
     print(map_result)
 
     # Extract URLs:
@@ -88,10 +81,7 @@ async def example_map_and_extract():
     extract_schema = ExtractSchema.schema()
 
     # Perform the extraction
-    extract_result = await app.extract(['https://firecrawl.dev'], {
-        'prompt': "Extract the title, description, and links from the website",
-        'schema': extract_schema
-    })
+    extract_result = await app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
     print(extract_result)
 
 # Define event handlers for websocket
diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py
index c30ba0fb..10431768 100644
--- a/apps/python-sdk/firecrawl/__init__.py
+++ b/apps/python-sdk/firecrawl/__init__.py
@@ -13,7 +13,7 @@ import os
 
 from .firecrawl import FirecrawlApp # noqa
 
-__version__ = "1.17.0"
+__version__ = "2.0.0"
 
 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")

From 0915db515c70535d8956ddef421fd7433e85223e Mon Sep 17 00:00:00 2001
From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 18 Apr 2025 01:20:16 -0700
Subject: [PATCH 23/26] async functions

---
 apps/python-sdk/firecrawl/firecrawl.py | 597 +++++++++++++++++--------
 1 file changed, 422 insertions(+), 175 deletions(-)

diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 0f7964e1..d622a7ce 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -2792,225 +2792,472 @@ class AsyncFirecrawlApp(FirecrawlApp):
             raise Exception(f'Failed to scrape URL. Error: {error_content}')
 
     async def batch_scrape_urls(
-            self,
-            urls: List[str],
-            params: Optional[ScrapeParams] = None) -> BatchScrapeStatusResponse:
+        self,
+        urls: List[str],
+        *,
+        formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        include_tags: Optional[List[str]] = None,
+        exclude_tags: Optional[List[str]] = None,
+        only_main_content: Optional[bool] = None,
+        wait_for: Optional[int] = None,
+        timeout: Optional[int] = None,
+        location: Optional[LocationConfig] = None,
+        mobile: Optional[bool] = None,
+        skip_tls_verification: Optional[bool] = None,
+        remove_base64_images: Optional[bool] = None,
+        block_ads: Optional[bool] = None,
+        proxy: Optional[Literal["basic", "stealth"]] = None,
+        extract: Optional[ExtractConfig] = None,
+        json_options: Optional[ExtractConfig] = None,
+        actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
+        agent: Optional[AgentOptions] = None,
+        poll_interval: Optional[int] = 2,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> BatchScrapeStatusResponse:
         """
         Asynchronously scrape multiple URLs and monitor until completion.
 
         Args:
             urls (List[str]): URLs to scrape
-            params (Optional[ScrapeParams]): See ScrapeParams model:
-              Content Options:
-              * formats - Content formats to retrieve
-              * includeTags - HTML tags to include
-              * excludeTags - HTML tags to exclude
-              * onlyMainContent - Extract main content only
-                
-              Request Options:
-              * headers - Custom HTTP headers
-              * timeout - Request timeout (ms)
-              * mobile - Use mobile user agent
-              * proxy - Proxy type
-              
-              Extraction Options:
-              * extract - Content extraction config
-              * jsonOptions - JSON extraction config
-              * actions - Actions to perform
+            formats (Optional[List[Literal]]): Content formats to retrieve
+            headers (Optional[Dict[str, str]]): Custom HTTP headers
+            include_tags (Optional[List[str]]): HTML tags to include
+            exclude_tags (Optional[List[str]]): HTML tags to exclude
+            only_main_content (Optional[bool]): Extract main content only
+            wait_for (Optional[int]): Wait time in milliseconds
+            timeout (Optional[int]): Request timeout in milliseconds
+            location (Optional[LocationConfig]): Location configuration
+            mobile (Optional[bool]): Use mobile user agent
+            skip_tls_verification (Optional[bool]): Skip TLS verification
+            remove_base64_images (Optional[bool]): Remove base64 encoded images
+            block_ads (Optional[bool]): Block advertisements
+            proxy (Optional[Literal]): Proxy type to use
+            extract (Optional[ExtractConfig]): Content extraction config
+            json_options (Optional[ExtractConfig]): JSON extraction config
+            actions (Optional[List[Union]]): Actions to perform
+            agent (Optional[AgentOptions]): Agent configuration
+            poll_interval (Optional[int]): Seconds between status checks (default: 2)
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          BatchScrapeStatusResponse with:
-          * Scraping status and progress
-          * Scraped content for each URL
-          * Success/error information
+            BatchScrapeStatusResponse with:
+            * Scraping status and progress
+            * Scraped content for each URL
+            * Success/error information
 
         Raises:
-          Exception: If batch scrape fails
+            Exception: If batch scrape fails
         """
-        headers = self._prepare_headers()
-        json_data = {'urls': urls}
-        if params:
-            json_data.update(params)
-            json_data['origin'] = f"python-sdk@{version}"
+        scrape_params = {}
 
-        endpoint = f'/v1/batch/scrape'
+        # Add individual parameters
+        if formats is not None:
+            scrape_params['formats'] = formats
+        if headers is not None:
+            scrape_params['headers'] = headers
+        if include_tags is not None:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags is not None:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for is not None:
+            scrape_params['waitFor'] = wait_for
+        if timeout is not None:
+            scrape_params['timeout'] = timeout
+        if location is not None:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy is not None:
+            scrape_params['proxy'] = proxy
+        if extract is not None:
+            if hasattr(extract.schema, 'schema'):
+                extract.schema = extract.schema.schema()
+            scrape_params['extract'] = extract.dict(exclude_none=True)
+        if json_options is not None:
+            if hasattr(json_options.schema, 'schema'):
+                json_options.schema = json_options.schema.schema()
+            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if actions is not None:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        if agent is not None:
+            scrape_params['agent'] = agent.dict(exclude_none=True)
+
+        # Add any additional kwargs
+        scrape_params.update(kwargs)
+
+        # Create final params object
+        final_params = ScrapeParams(**scrape_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['urls'] = urls
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
+        headers = self._prepare_headers(idempotency_key)
         response = await self._async_post_request(
-            f'{self.api_url}{endpoint}',
-            json_data,
+            f'{self.api_url}/v1/batch/scrape',
+            params_dict,
             headers
         )
 
-        if response.get('success') and 'id' in response:
-            return await self._async_monitor_job_status(response['id'], headers)
+        if response.status_code == 200:
+            try:
+                id = response.json().get('id')
+            except:
+                raise Exception(f'Failed to parse Firecrawl response as JSON.')
+            return self._monitor_job_status(id, headers, poll_interval)
         else:
-            raise Exception(f'Failed to start batch scrape. Error: {response.get("error")}')
+            self._handle_error(response, 'start batch scrape job')
+
 
     async def async_batch_scrape_urls(
-            self,
-            urls: List[str],
-            params: Optional[ScrapeParams] = None,
-            idempotency_key: Optional[str] = None) -> BatchScrapeResponse:
+        self,
+        urls: List[str],
+        *,
+        formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        include_tags: Optional[List[str]] = None,
+        exclude_tags: Optional[List[str]] = None,
+        only_main_content: Optional[bool] = None,
+        wait_for: Optional[int] = None,
+        timeout: Optional[int] = None,
+        location: Optional[LocationConfig] = None,
+        mobile: Optional[bool] = None,
+        skip_tls_verification: Optional[bool] = None,
+        remove_base64_images: Optional[bool] = None,
+        block_ads: Optional[bool] = None,
+        proxy: Optional[Literal["basic", "stealth"]] = None,
+        extract: Optional[ExtractConfig] = None,
+        json_options: Optional[ExtractConfig] = None,
+        actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
+        agent: Optional[AgentOptions] = None,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> BatchScrapeResponse:
         """
-        Initiate an asynchronous batch scrape job without waiting for completion.
+        Initiate a batch scrape job asynchronously.
 
         Args:
-          urls (List[str]): List of URLs to scrape
-          params (Optional[ScrapeParams]): See ScrapeParams model for configuration:
-            Content Options:
-            * formats - Content formats to retrieve
-            * includeTags - HTML tags to include
-            * excludeTags - HTML tags to exclude
-            * onlyMainContent - Extract main content only
-            
-            Request Options:
-            * headers - Custom HTTP headers
-            * timeout - Request timeout (ms)
-            * mobile - Use mobile user agent
-            * proxy - Proxy type
-            
-            Extraction Options:
-            * extract - Content extraction config
-            * jsonOptions - JSON extraction config
-            * actions - Actions to perform
-          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            urls (List[str]): URLs to scrape
+            formats (Optional[List[Literal]]): Content formats to retrieve
+            headers (Optional[Dict[str, str]]): Custom HTTP headers
+            include_tags (Optional[List[str]]): HTML tags to include
+            exclude_tags (Optional[List[str]]): HTML tags to exclude
+            only_main_content (Optional[bool]): Extract main content only
+            wait_for (Optional[int]): Wait time in milliseconds
+            timeout (Optional[int]): Request timeout in milliseconds
+            location (Optional[LocationConfig]): Location configuration
+            mobile (Optional[bool]): Use mobile user agent
+            skip_tls_verification (Optional[bool]): Skip TLS verification
+            remove_base64_images (Optional[bool]): Remove base64 encoded images
+            block_ads (Optional[bool]): Block advertisements
+            proxy (Optional[Literal]): Proxy type to use
+            extract (Optional[ExtractConfig]): Content extraction config
+            json_options (Optional[ExtractConfig]): JSON extraction config
+            actions (Optional[List[Union]]): Actions to perform
+            agent (Optional[AgentOptions]): Agent configuration
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          BatchScrapeResponse with:
-          * success - Whether job started successfully
-          * id - Unique identifier for the job
-          * url - Status check URL
-          * error - Error message if start failed
+            BatchScrapeResponse with:
+            * success - Whether job started successfully
+            * id - Unique identifier for the job
+            * url - Status check URL
+            * error - Error message if start failed
 
         Raises:
-          Exception: If job initiation fails
+            Exception: If job initiation fails
         """
-        headers = self._prepare_headers(idempotency_key)
-        json_data = {'urls': urls}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
+        scrape_params = {}
 
-        endpoint = f'/v1/batch/scrape'
-        return await self._async_post_request(
-            f'{self.api_url}{endpoint}',
-            json_data,
+        # Add individual parameters
+        if formats is not None:
+            scrape_params['formats'] = formats
+        if headers is not None:
+            scrape_params['headers'] = headers
+        if include_tags is not None:
+            scrape_params['includeTags'] = include_tags
+        if exclude_tags is not None:
+            scrape_params['excludeTags'] = exclude_tags
+        if only_main_content is not None:
+            scrape_params['onlyMainContent'] = only_main_content
+        if wait_for is not None:
+            scrape_params['waitFor'] = wait_for
+        if timeout is not None:
+            scrape_params['timeout'] = timeout
+        if location is not None:
+            scrape_params['location'] = location.dict(exclude_none=True)
+        if mobile is not None:
+            scrape_params['mobile'] = mobile
+        if skip_tls_verification is not None:
+            scrape_params['skipTlsVerification'] = skip_tls_verification
+        if remove_base64_images is not None:
+            scrape_params['removeBase64Images'] = remove_base64_images
+        if block_ads is not None:
+            scrape_params['blockAds'] = block_ads
+        if proxy is not None:
+            scrape_params['proxy'] = proxy
+        if extract is not None:
+            if hasattr(extract.schema, 'schema'):
+                extract.schema = extract.schema.schema()
+            scrape_params['extract'] = extract.dict(exclude_none=True)
+        if json_options is not None:
+            if hasattr(json_options.schema, 'schema'):
+                json_options.schema = json_options.schema.schema()
+            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if actions is not None:
+            scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        if agent is not None:
+            scrape_params['agent'] = agent.dict(exclude_none=True)
+
+        # Add any additional kwargs
+        scrape_params.update(kwargs)
+
+        # Create final params object
+        final_params = ScrapeParams(**scrape_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['urls'] = urls
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
+        headers = self._prepare_headers(idempotency_key)
+        response = await self._async_post_request(
+            f'{self.api_url}/v1/batch/scrape',
+            params_dict,
             headers
         )
 
+        if response.status_code == 200:
+            try:
+                return BatchScrapeResponse(**response.json())
+            except:
+                raise Exception(f'Failed to parse Firecrawl response as JSON.')
+        else:
+            self._handle_error(response, 'start batch scrape job')
+
     async def crawl_url(
-            self,
-            url: str,
-            params: Optional[CrawlParams] = None,
-            poll_interval: int = 2,
-            idempotency_key: Optional[str] = None) -> CrawlStatusResponse:
+                    self,
+        url: str,
+        *,
+        include_paths: Optional[List[str]] = None,
+        exclude_paths: Optional[List[str]] = None,
+        max_depth: Optional[int] = None,
+        max_discovery_depth: Optional[int] = None,
+        limit: Optional[int] = None,
+        allow_backward_links: Optional[bool] = None,
+        allow_external_links: Optional[bool] = None,
+        ignore_sitemap: Optional[bool] = None,
+        scrape_options: Optional[CommonOptions] = None,
+        webhook: Optional[Union[str, WebhookConfig]] = None,
+        deduplicate_similar_urls: Optional[bool] = None,
+        ignore_query_parameters: Optional[bool] = None,
+        regex_on_full_url: Optional[bool] = None,
+        poll_interval: Optional[int] = 2,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> CrawlStatusResponse:
         """
-        Asynchronously crawl a website starting from a URL and monitor until completion.
-
-        Args:
-          url (str): Target URL to start crawling from
-          params (Optional[CrawlParams]): See CrawlParams model:
-            URL Discovery:
-            * includePaths - Patterns of URLs to include
-            * excludePaths - Patterns of URLs to exclude
-            * maxDepth - Maximum crawl depth
-            * maxDiscoveryDepth - Maximum depth for finding new URLs
-            * limit - Maximum pages to crawl
-
-            Link Following:
-            * allowBackwardLinks - Follow parent directory links
-            * allowExternalLinks - Follow external domain links  
-            * ignoreSitemap - Skip sitemap.xml processing
-
-            Advanced:
-            * scrapeOptions - Page scraping configuration
-            * webhook - Notification webhook settings
-            * deduplicateSimilarURLs - Remove similar URLs
-            * ignoreQueryParameters - Ignore URL parameters
-            * regexOnFullURL - Apply regex to full URLs
-          poll_interval (int): Seconds between status checks (default: 2)
-          idempotency_key (Optional[str]): Unique key to prevent duplicate requests
-
-        Returns:
-          CrawlStatusResponse with:
-          * Crawling status and progress
-          * Crawled page contents
-          * Success/error information
-
-        Raises:
-          Exception: If crawl fails
-        """
-        headers = self._prepare_headers(idempotency_key)
-        json_data = {'url': url}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
-
-        endpoint = f'/v1/crawl'
-        response = await self._async_post_request(
-            f'{self.api_url}{endpoint}',
-            json_data,
-            headers
-        )
-
-        if response.get('success') and 'id' in response:
-            return await self._async_monitor_job_status(response['id'], headers, poll_interval)
-        else:
-            raise Exception(f'Failed to start crawl. Error: {response.get("error")}')
-
-    async def async_crawl_url(
-            self,
-            url: str,
-            params: Optional[CrawlParams] = None,
-            idempotency_key: Optional[str] = None) -> CrawlResponse:
-        """
-        Initiate an asynchronous crawl job without waiting for completion.
+        Crawl a website starting from a URL.
 
         Args:
             url (str): Target URL to start crawling from
-            params (Optional[CrawlParams]): See CrawlParams model:
-              URL Discovery:
-              * includePaths - Patterns of URLs to include
-              * excludePaths - Patterns of URLs to exclude
-              * maxDepth - Maximum crawl depth
-              * maxDiscoveryDepth - Maximum depth for finding new URLs
-              * limit - Maximum pages to crawl
-
-              Link Following:
-              * allowBackwardLinks - Follow parent directory links
-              * allowExternalLinks - Follow external domain links  
-              * ignoreSitemap - Skip sitemap.xml processing
-
-              Advanced:
-              * scrapeOptions - Page scraping configuration
-              * webhook - Notification webhook settings
-              * deduplicateSimilarURLs - Remove similar URLs
-              * ignoreQueryParameters - Ignore URL parameters
-              * regexOnFullURL - Apply regex to full URLs
+            include_paths (Optional[List[str]]): Patterns of URLs to include
+            exclude_paths (Optional[List[str]]): Patterns of URLs to exclude
+            max_depth (Optional[int]): Maximum crawl depth
+            max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
+            limit (Optional[int]): Maximum pages to crawl
+            allow_backward_links (Optional[bool]): Follow parent directory links
+            allow_external_links (Optional[bool]): Follow external domain links
+            ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
+            scrape_options (Optional[CommonOptions]): Page scraping configuration
+            webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
+            deduplicate_similar_urls (Optional[bool]): Remove similar URLs
+            ignore_query_parameters (Optional[bool]): Ignore URL parameters
+            regex_on_full_url (Optional[bool]): Apply regex to full URLs
+            poll_interval (Optional[int]): Seconds between status checks (default: 2)
             idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
 
         Returns:
-          CrawlResponse with:
-          * success - Whether job started successfully
-          * id - Unique identifier for the job
-          * url - Status check URL
-          * error - Error message if start failed
+            CrawlStatusResponse with:
+            * Crawling status and progress
+            * Crawled page contents
+            * Success/error information
 
         Raises:
-          Exception: If job initiation fails
+            Exception: If crawl fails
         """
-        headers = self._prepare_headers(idempotency_key)
-        json_data = {'url': url}
-        if params:
-            json_data.update(params)
-        json_data['origin'] = f"python-sdk@{version}"
+        crawl_params = {}
 
-        endpoint = f'/v1/crawl'
-        return await self._async_post_request(
-            f'{self.api_url}{endpoint}',
-            json_data,
-            headers
+        # Add individual parameters
+        if include_paths is not None:
+            crawl_params['includePaths'] = include_paths
+        if exclude_paths is not None:
+            crawl_params['excludePaths'] = exclude_paths
+        if max_depth is not None:
+            crawl_params['maxDepth'] = max_depth
+        if max_discovery_depth is not None:
+            crawl_params['maxDiscoveryDepth'] = max_discovery_depth
+        if limit is not None:
+            crawl_params['limit'] = limit
+        if allow_backward_links is not None:
+            crawl_params['allowBackwardLinks'] = allow_backward_links
+        if allow_external_links is not None:
+            crawl_params['allowExternalLinks'] = allow_external_links
+        if ignore_sitemap is not None:
+            crawl_params['ignoreSitemap'] = ignore_sitemap
+        if scrape_options is not None:
+            crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
+        if webhook is not None:
+            crawl_params['webhook'] = webhook
+        if deduplicate_similar_urls is not None:
+            crawl_params['deduplicateSimilarURLs'] = deduplicate_similar_urls
+        if ignore_query_parameters is not None:
+            crawl_params['ignoreQueryParameters'] = ignore_query_parameters
+        if regex_on_full_url is not None:
+            crawl_params['regexOnFullURL'] = regex_on_full_url
+
+        # Add any additional kwargs
+        crawl_params.update(kwargs)
+
+        # Create final params object
+        final_params = CrawlParams(**crawl_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['url'] = url
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
+        headers = self._prepare_headers(idempotency_key)
+        response = await self._async_post_request(
+          f'{self.api_url}/v1/crawl', params_dict, headers)
+
+        if response.status_code == 200:
+            try:
+                id = response.json().get('id')
+            except:
+                raise Exception(f'Failed to parse Firecrawl response as JSON.')
+            return self._monitor_job_status(id, headers, poll_interval)
+        else:
+            self._handle_error(response, 'start crawl job')
+
+
+    async def async_crawl_url(
+       self,
+        url: str,
+        *,
+        include_paths: Optional[List[str]] = None,
+        exclude_paths: Optional[List[str]] = None,
+        max_depth: Optional[int] = None,
+        max_discovery_depth: Optional[int] = None,
+        limit: Optional[int] = None,
+        allow_backward_links: Optional[bool] = None,
+        allow_external_links: Optional[bool] = None,
+        ignore_sitemap: Optional[bool] = None,
+        scrape_options: Optional[CommonOptions] = None,
+        webhook: Optional[Union[str, WebhookConfig]] = None,
+        deduplicate_similar_urls: Optional[bool] = None,
+        ignore_query_parameters: Optional[bool] = None,
+        regex_on_full_url: Optional[bool] = None,
+        idempotency_key: Optional[str] = None,
+        **kwargs
+    ) -> CrawlResponse:
+        """
+        Start an asynchronous crawl job.
+
+        Args:
+            url (str): Target URL to start crawling from
+            include_paths (Optional[List[str]]): Patterns of URLs to include
+            exclude_paths (Optional[List[str]]): Patterns of URLs to exclude
+            max_depth (Optional[int]): Maximum crawl depth
+            max_discovery_depth (Optional[int]): Maximum depth for finding new URLs
+            limit (Optional[int]): Maximum pages to crawl
+            allow_backward_links (Optional[bool]): Follow parent directory links
+            allow_external_links (Optional[bool]): Follow external domain links
+            ignore_sitemap (Optional[bool]): Skip sitemap.xml processing
+            scrape_options (Optional[CommonOptions]): Page scraping configuration
+            webhook (Optional[Union[str, WebhookConfig]]): Notification webhook settings
+            deduplicate_similar_urls (Optional[bool]): Remove similar URLs
+            ignore_query_parameters (Optional[bool]): Ignore URL parameters
+            regex_on_full_url (Optional[bool]): Apply regex to full URLs
+            idempotency_key (Optional[str]): Unique key to prevent duplicate requests
+            **kwargs: Additional parameters to pass to the API
+
+        Returns:
+            CrawlResponse with:
+            * success - Whether crawl started successfully
+            * id - Unique identifier for the crawl job
+            * url - Status check URL for the crawl
+            * error - Error message if start failed
+
+        Raises:
+            Exception: If crawl initiation fails
+        """
+        crawl_params = {}
+
+        # Add individual parameters
+        if include_paths is not None:
+            crawl_params['includePaths'] = include_paths
+        if exclude_paths is not None:
+            crawl_params['excludePaths'] = exclude_paths
+        if max_depth is not None:
+            crawl_params['maxDepth'] = max_depth
+        if max_discovery_depth is not None:
+            crawl_params['maxDiscoveryDepth'] = max_discovery_depth
+        if limit is not None:
+            crawl_params['limit'] = limit
+        if allow_backward_links is not None:
+            crawl_params['allowBackwardLinks'] = allow_backward_links
+        if allow_external_links is not None:
+            crawl_params['allowExternalLinks'] = allow_external_links
+        if ignore_sitemap is not None:
+            crawl_params['ignoreSitemap'] = ignore_sitemap
+        if scrape_options is not None:
+            crawl_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
+        if webhook is not None:
+            crawl_params['webhook'] = webhook
+        if deduplicate_similar_urls is not None:
+            crawl_params['deduplicateSimilarURLs'] = deduplicate_similar_urls
+        if ignore_query_parameters is not None:
+            crawl_params['ignoreQueryParameters'] = ignore_query_parameters
+        if regex_on_full_url is not None:
+            crawl_params['regexOnFullURL'] = regex_on_full_url
+
+        # Add any additional kwargs
+        crawl_params.update(kwargs)
+
+        # Create final params object
+        final_params = CrawlParams(**crawl_params)
+        params_dict = final_params.dict(exclude_none=True)
+        params_dict['url'] = url
+        params_dict['origin'] = f"python-sdk@{version}"
+
+        # Make request
+        headers = self._prepare_headers(idempotency_key)
+        response = await self._async_post_request(
+          f'{self.api_url}/v1/crawl',
+          params_dict,
+          headers
         )
 
+        if response.status_code == 200:
+            try:
+                return CrawlResponse(**response.json())
+            except:
+                raise Exception(f'Failed to parse Firecrawl response as JSON.')
+        else:
+            self._handle_error(response, 'start crawl job')
+
     async def check_crawl_status(self, id: str) -> CrawlStatusResponse:
         """
         Check the status and results of an asynchronous crawl job.

From 9ba1ae9ae13c7b4892712504f9f3bb7b274fa173 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:28:31 -0700
Subject: [PATCH 24/26] Nick:

---
 apps/api/src/routes/v1.ts              |  4 ++--
 apps/python-sdk/firecrawl/firecrawl.py | 32 ++++++++++++++++++++++----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts
index 185a70de..271f2b17 100644
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@@ -278,14 +278,14 @@ v1Router.get(
 
 v1Router.post(
   "/deep-research",
-  authMiddleware(RateLimiterMode.Extract),
+  authMiddleware(RateLimiterMode.Crawl),
   checkCreditsMiddleware(1),
   wrap(deepResearchController),
 );
 
 v1Router.get(
   "/deep-research/:jobId",
-  authMiddleware(RateLimiterMode.ExtractStatus),
+  authMiddleware(RateLimiterMode.CrawlStatus),
   wrap(deepResearchStatusController),
 );
 
diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py
index 1442894e..76816a1d 100644
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@@ -1843,7 +1843,12 @@ class FirecrawlApp:
             __experimental_stream=experimental_stream
         )
 
-        response = self.async_generate_llms_text(url, params)
+        response = self.async_generate_llms_text(
+            url,
+            max_urls=max_urls,
+            show_full_text=show_full_text,
+            experimental_stream=experimental_stream
+        )
         if not response.get('success') or 'id' not in response:
             return response
 
@@ -2219,7 +2224,14 @@ class FirecrawlApp:
             research_params['__experimental_streamSteps'] = __experimental_stream_steps
         research_params = DeepResearchParams(**research_params)
 
-        response = self.async_deep_research(query, research_params)
+        response = self.async_deep_research(
+            query,
+            max_depth=max_depth,
+            time_limit=time_limit,
+            max_urls=max_urls,
+            analysis_prompt=analysis_prompt,
+            system_prompt=system_prompt
+        )
         if not response.get('success') or 'id' not in response:
             return response
 
@@ -3529,7 +3541,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
         if experimental_stream is not None:
             params['__experimental_stream'] = experimental_stream
 
-        response = await self.async_generate_llms_text(url, params)
+        response = await self.async_generate_llms_text(
+            url,
+            max_urls=max_urls,
+            show_full_text=show_full_text,
+            experimental_stream=experimental_stream
+        )
         if not response.get('success') or 'id' not in response:
             return response
 
@@ -3678,7 +3695,14 @@ class AsyncFirecrawlApp(FirecrawlApp):
             research_params['__experimental_streamSteps'] = __experimental_stream_steps
         research_params = DeepResearchParams(**research_params)
 
-        response = await self.async_deep_research(query, research_params)
+        response = await self.async_deep_research(
+            query,
+            max_depth=max_depth,
+            time_limit=time_limit,
+            max_urls=max_urls,
+            analysis_prompt=analysis_prompt,
+            system_prompt=system_prompt
+        )
         if not response.get('success') or 'id' not in response:
             return response
 

From 9e67d7ba22ba9956d3e02d629d203d58296abbe4 Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:30:40 -0700
Subject: [PATCH 25/26] Nick:

---
 apps/python-sdk/pyproject.toml | 3 ++-
 apps/python-sdk/setup.py       | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/apps/python-sdk/pyproject.toml b/apps/python-sdk/pyproject.toml
index 5a87d8c5..0483c31c 100644
--- a/apps/python-sdk/pyproject.toml
+++ b/apps/python-sdk/pyproject.toml
@@ -13,7 +13,8 @@ dependencies = [
     "python-dotenv",
     "websockets",
     "nest-asyncio",
-    "pydantic>=2.10.3",
+    "pydantic",
+    "aiohttp"
 ]
 authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
 maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py
index 8a67d1fd..1fb31664 100644
--- a/apps/python-sdk/setup.py
+++ b/apps/python-sdk/setup.py
@@ -32,7 +32,9 @@ setup(
         'python-dotenv',
         'websockets',
         'asyncio',
-        'nest-asyncio'
+        'nest-asyncio',
+        'pydantic',
+        'aiohttp'
     ],
     python_requires=">=3.8",
     classifiers=[

From 06c54bc41cb97590be44221ec82c74fe8d3c2bda Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Fri, 18 Apr 2025 01:43:18 -0700
Subject: [PATCH 26/26] Update __init__.py

---
 apps/python-sdk/firecrawl/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py
index 10431768..eea9ba54 100644
--- a/apps/python-sdk/firecrawl/__init__.py
+++ b/apps/python-sdk/firecrawl/__init__.py
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
 import logging
 import os
 
-from .firecrawl import FirecrawlApp # noqa
+from .firecrawl import FirecrawlApp, ExtractConfig # noqa
 
-__version__ = "2.0.0"
+__version__ = "2.0.1"
 
 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")