diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 38495f3e..1d1715ed 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -314,6 +314,26 @@ export interface SearchResponse { error?: string; } +/** + * Response interface for crawl/batch scrape error monitoring. + */ +export interface CrawlErrorsResponse { + /** + * Scrapes that errored out + error details + */ + errors: { + id: string, + timestamp?: string, + url: string, + error: string, + }[]; + + /** + * URLs blocked by robots.txt + */ + robotsBlocked: string[]; +}; + /** * Main class for interacting with the Firecrawl API. * Provides methods for scraping, searching, crawling, and mapping web content. @@ -621,6 +641,29 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } + /** + * Returns information about crawl errors. + * @param id - The ID of the crawl operation. + * @returns Information about crawl errors. + */ + async checkCrawlErrors(id: string): Promise { + const headers = this.prepareHeaders(); + try { + const response: AxiosResponse = await this.deleteRequest( + `${this.apiUrl}/v1/crawl/${id}/errors`, + headers + ); + if (response.status === 200) { + return response.data; + } else { + this.handleError(response, "check crawl errors"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + return { success: false, error: "Internal server error." }; + } + /** * Cancels a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. @@ -883,6 +926,29 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } + /** + * Returns information about batch scrape errors. + * @param id - The ID of the batch scrape operation. + * @returns Information about batch scrape errors. + */ + async checkBatchScrapeErrors(id: string): Promise { + const headers = this.prepareHeaders(); + try { + const response: AxiosResponse = await this.deleteRequest( + `${this.apiUrl}/v1/batch/scrape/${id}/errors`, + headers + ); + if (response.status === 200) { + return response.data; + } else { + this.handleError(response, "check batch scrape errors"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + return { success: false, error: "Internal server error." }; + } + /** * Extracts information from URLs using the Firecrawl API. * Currently in Beta. Expect breaking changes on future minor versions. diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 94539c2a..078e6a56 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -309,6 +309,26 @@ class FirecrawlApp: else: self._handle_error(response, 'check crawl status') + def check_crawl_errors(self, id: str) -> Dict[str, Any]: + """ + Returns information about crawl errors. + + Args: + id (str): The ID of the crawl job. + + Returns: + Dict[str, Any]: Information about crawl errors. + """ + headers = self._prepare_headers() + response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers) + if response.status_code == 200: + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') + else: + self._handle_error(response, "check crawl errors") + def cancel_crawl(self, id: str) -> Dict[str, Any]: """ Cancel an asynchronous crawl job using the Firecrawl API. @@ -546,6 +566,25 @@ class FirecrawlApp: else: self._handle_error(response, 'check batch scrape status') + def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]: + """ + Returns information about batch scrape errors. + + Args: + id (str): The ID of the crawl job. + + Returns: + Dict[str, Any]: Information about crawl errors. + """ + headers = self._prepare_headers() + response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers) + if response.status_code == 200: + try: + return response.json() + except: + raise Exception(f'Failed to parse Firecrawl response as JSON.') + else: + self._handle_error(response, "check batch scrape errors") def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any: """