From 2689ffa748780b40487ed4172735653bd896877c Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:08:08 -0300 Subject: [PATCH 1/3] feat-sdk/cancel-crawl --- apps/js-sdk/firecrawl/src/index.ts | 57 ++++++++++++++++++++++++++ apps/python-sdk/firecrawl/firecrawl.py | 44 ++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index a6215037..8b60cf86 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -421,6 +421,36 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } + /** + * Cancels a crawl job using the Firecrawl API. + * @param id - The ID of the crawl operation. + * @returns The response from the cancel crawl operation. + */ + async cancelCrawl(id: string): Promise { + const headers = this.prepareHeaders(); + try { + const response: AxiosResponse = await this.deleteRequest( + `${this.apiUrl}/v1/crawl/${id}`, + headers + ); + if (response.status === 200) { + return response.data; + } else { + this.handleError(response, "cancel crawl job"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + return { success: false, error: "Internal server error." }; + } + + /** + * Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket. + * @param url - The URL to crawl. + * @param params - Additional parameters for the crawl request. + * @param idempotencyKey - Optional idempotency key for the request. + * @returns A CrawlWatcher instance to monitor the crawl job. + */ async crawlUrlAndWatch( url: string, params?: CrawlParams, @@ -436,6 +466,12 @@ export default class FirecrawlApp { throw new FirecrawlError("Crawl job failed to start", 400); } + /** + * Maps a URL using the Firecrawl API. + * @param url - The URL to map. + * @param params - Additional parameters for the map request. + * @returns The response from the map operation. + */ async mapUrl(url: string, params?: MapParams): Promise { const headers = this.prepareHeaders(); let jsonData: { url: string } & MapParams = { url, ...params }; @@ -506,6 +542,27 @@ export default class FirecrawlApp { } } + /** + * Sends a DELETE request to the specified URL. + * @param url - The URL to send the request to. + * @param headers - The headers for the request. + * @returns The response from the DELETE request. + */ + async deleteRequest( + url: string, + headers: AxiosRequestHeaders + ): Promise { + try { + return await axios.delete(url, { headers }); + } catch (error) { + if (error instanceof AxiosError && error.response) { + return error.response as AxiosResponse; + } else { + throw error; + } + } + } + /** * Monitors the status of a crawl job until completion or failure. * @param id - The ID of the crawl operation. diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 97f4e04f..f153b034 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -191,6 +191,23 @@ class FirecrawlApp: } else: self._handle_error(response, 'check crawl status') + + def cancel_crawl(self, id: str) -> Dict[str, Any]: + """ + Cancel an asynchronous crawl job using the Firecrawl API. + + Args: + id (str): The ID of the crawl job to cancel. + + Returns: + Dict[str, Any]: The response from the cancel crawl request. + """ + headers = self._prepare_headers() + response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers) + if response.status_code == 200: + return response.json() + else: + self._handle_error(response, "cancel crawl job") def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher': """ @@ -321,6 +338,33 @@ class FirecrawlApp: else: return response return response + + def _delete_request(self, url: str, + headers: Dict[str, str], + retries: int = 3, + backoff_factor: float = 0.5) -> requests.Response: + """ + Make a DELETE request with retries. + + Args: + url (str): The URL to send the DELETE request to. + headers (Dict[str, str]): The headers to include in the DELETE request. + retries (int): Number of retries for the request. + backoff_factor (float): Backoff factor for retries. + + Returns: + requests.Response: The response from the DELETE request. + + Raises: + requests.RequestException: If the request fails after the specified retries. + """ + for attempt in range(retries): + response = requests.delete(url, headers=headers) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any: """ From 2cde877342fd1d4cb24dfe3e34d2dbb5bcbffcf7 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 10 Oct 2024 17:44:27 -0300 Subject: [PATCH 2/3] Nick: version bump --- apps/js-sdk/firecrawl/package.json | 2 +- apps/python-sdk/firecrawl/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index d4f4f41a..114e9a06 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.5.3", + "version": "1.6.0", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 540ce67e..3cea54ce 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp -__version__ = "1.2.4" +__version__ = "1.3.0" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") From bfed65d443d427b0fa164b215bc6d6ca52461996 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 10 Oct 2024 17:46:49 -0300 Subject: [PATCH 3/3] Update package.json --- apps/js-sdk/firecrawl/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 114e9a06..e50205d5 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { - "name": "@mendable/firecrawl-js", - "version": "1.6.0", + "name": "firecrawl", + "version": "1.6.1", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts",