feat(sdk): check crawl/batch scrape errors

This commit is contained in:
Gergő Móricz 2025-01-17 18:06:04 +01:00
parent dbc6d07871
commit 146dc47954
2 changed files with 105 additions and 0 deletions

View File

@ -314,6 +314,26 @@ export interface SearchResponse {
error?: string;
}
/**
* Response interface for crawl/batch scrape error monitoring.
*/
export interface CrawlErrorsResponse {
/**
* Scrapes that errored out + error details
*/
errors: {
id: string,
timestamp?: string,
url: string,
error: string,
}[];
/**
* URLs blocked by robots.txt
*/
robotsBlocked: string[];
};
/**
* Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content.
@ -621,6 +641,29 @@ export default class FirecrawlApp {
return { success: false, error: "Internal server error." };
}
/**
* Returns information about crawl errors.
* @param id - The ID of the crawl operation.
* @returns Information about crawl errors.
*/
async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
const headers = this.prepareHeaders();
try {
const response: AxiosResponse = await this.deleteRequest(
`${this.apiUrl}/v1/crawl/${id}/errors`,
headers
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, "check crawl errors");
}
} catch (error: any) {
throw new FirecrawlError(error.message, 500);
}
return { success: false, error: "Internal server error." };
}
/**
* Cancels a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation.
@ -883,6 +926,29 @@ export default class FirecrawlApp {
return { success: false, error: "Internal server error." };
}
/**
* Returns information about batch scrape errors.
* @param id - The ID of the batch scrape operation.
* @returns Information about batch scrape errors.
*/
async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
const headers = this.prepareHeaders();
try {
const response: AxiosResponse = await this.deleteRequest(
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
headers
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, "check batch scrape errors");
}
} catch (error: any) {
throw new FirecrawlError(error.message, 500);
}
return { success: false, error: "Internal server error." };
}
/**
* Extracts information from URLs using the Firecrawl API.
* Currently in Beta. Expect breaking changes on future minor versions.

View File

@ -309,6 +309,26 @@ class FirecrawlApp:
else:
self._handle_error(response, 'check crawl status')
def check_crawl_errors(self, id: str) -> Dict[str, Any]:
"""
Returns information about crawl errors.
Args:
id (str): The ID of the crawl job.
Returns:
Dict[str, Any]: Information about crawl errors.
"""
headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
if response.status_code == 200:
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, "check crawl errors")
def cancel_crawl(self, id: str) -> Dict[str, Any]:
"""
Cancel an asynchronous crawl job using the Firecrawl API.
@ -546,6 +566,25 @@ class FirecrawlApp:
else:
self._handle_error(response, 'check batch scrape status')
def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]:
"""
Returns information about batch scrape errors.
Args:
id (str): The ID of the crawl job.
Returns:
Dict[str, Any]: Information about crawl errors.
"""
headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
if response.status_code == 200:
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, "check batch scrape errors")
def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
"""