Merge pull request #760 from mendableai/feat-sdks/cancel-crawl

[SKD] Cancel Crawl
This commit is contained in:
Nicolas 2024-10-10 17:48:51 -03:00 committed by GitHub
commit 0d48f4513a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 104 additions and 3 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "@mendable/firecrawl-js", "name": "firecrawl",
"version": "1.5.3", "version": "1.6.1",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",

View File

@ -421,6 +421,36 @@ export default class FirecrawlApp {
return { success: false, error: "Internal server error." }; return { success: false, error: "Internal server error." };
} }
/**
* Cancels a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation.
* @returns The response from the cancel crawl operation.
*/
async cancelCrawl(id: string): Promise<ErrorResponse> {
const headers = this.prepareHeaders();
try {
const response: AxiosResponse = await this.deleteRequest(
`${this.apiUrl}/v1/crawl/${id}`,
headers
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, "cancel crawl job");
}
} catch (error: any) {
throw new FirecrawlError(error.message, 500);
}
return { success: false, error: "Internal server error." };
}
/**
* Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket.
* @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request.
* @param idempotencyKey - Optional idempotency key for the request.
* @returns A CrawlWatcher instance to monitor the crawl job.
*/
async crawlUrlAndWatch( async crawlUrlAndWatch(
url: string, url: string,
params?: CrawlParams, params?: CrawlParams,
@ -436,6 +466,12 @@ export default class FirecrawlApp {
throw new FirecrawlError("Crawl job failed to start", 400); throw new FirecrawlError("Crawl job failed to start", 400);
} }
/**
* Maps a URL using the Firecrawl API.
* @param url - The URL to map.
* @param params - Additional parameters for the map request.
* @returns The response from the map operation.
*/
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> { async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
let jsonData: { url: string } & MapParams = { url, ...params }; let jsonData: { url: string } & MapParams = { url, ...params };
@ -506,6 +542,27 @@ export default class FirecrawlApp {
} }
} }
/**
* Sends a DELETE request to the specified URL.
* @param url - The URL to send the request to.
* @param headers - The headers for the request.
* @returns The response from the DELETE request.
*/
async deleteRequest(
url: string,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
try {
return await axios.delete(url, { headers });
} catch (error) {
if (error instanceof AxiosError && error.response) {
return error.response as AxiosResponse;
} else {
throw error;
}
}
}
/** /**
* Monitors the status of a crawl job until completion or failure. * Monitors the status of a crawl job until completion or failure.
* @param id - The ID of the crawl operation. * @param id - The ID of the crawl operation.

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp from .firecrawl import FirecrawlApp
__version__ = "1.2.4" __version__ = "1.3.0"
# Define the logger for the Firecrawl project # Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl") logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -192,6 +192,23 @@ class FirecrawlApp:
else: else:
self._handle_error(response, 'check crawl status') self._handle_error(response, 'check crawl status')
def cancel_crawl(self, id: str) -> Dict[str, Any]:
"""
Cancel an asynchronous crawl job using the Firecrawl API.
Args:
id (str): The ID of the crawl job to cancel.
Returns:
Dict[str, Any]: The response from the cancel crawl request.
"""
headers = self._prepare_headers()
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
if response.status_code == 200:
return response.json()
else:
self._handle_error(response, "cancel crawl job")
def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher': def crawl_url_and_watch(self, url: str, params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
""" """
Initiate a crawl job and return a CrawlWatcher to monitor the job via WebSocket. Initiate a crawl job and return a CrawlWatcher to monitor the job via WebSocket.
@ -322,6 +339,33 @@ class FirecrawlApp:
return response return response
return response return response
def _delete_request(self, url: str,
headers: Dict[str, str],
retries: int = 3,
backoff_factor: float = 0.5) -> requests.Response:
"""
Make a DELETE request with retries.
Args:
url (str): The URL to send the DELETE request to.
headers (Dict[str, str]): The headers to include in the DELETE request.
retries (int): Number of retries for the request.
backoff_factor (float): Backoff factor for retries.
Returns:
requests.Response: The response from the DELETE request.
Raises:
requests.RequestException: If the request fails after the specified retries.
"""
for attempt in range(retries):
response = requests.delete(url, headers=headers)
if response.status_code == 502:
time.sleep(backoff_factor * (2 ** attempt))
else:
return response
return response
def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any: def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: int) -> Any:
""" """
Monitor the status of a crawl job until completion. Monitor the status of a crawl job until completion.