mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 04:39:03 +08:00
feat(sdk): check crawl/batch scrape errors
This commit is contained in:
parent
dbc6d07871
commit
146dc47954
@ -314,6 +314,26 @@ export interface SearchResponse {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawl/batch scrape error monitoring.
|
||||
*/
|
||||
export interface CrawlErrorsResponse {
|
||||
/**
|
||||
* Scrapes that errored out + error details
|
||||
*/
|
||||
errors: {
|
||||
id: string,
|
||||
timestamp?: string,
|
||||
url: string,
|
||||
error: string,
|
||||
}[];
|
||||
|
||||
/**
|
||||
* URLs blocked by robots.txt
|
||||
*/
|
||||
robotsBlocked: string[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||
@ -621,6 +641,29 @@ export default class FirecrawlApp {
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about crawl errors.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @returns Information about crawl errors.
|
||||
*/
|
||||
async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.deleteRequest(
|
||||
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, "check crawl errors");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new FirecrawlError(error.message, 500);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancels a crawl job using the Firecrawl API.
|
||||
* @param id - The ID of the crawl operation.
|
||||
@ -883,6 +926,29 @@ export default class FirecrawlApp {
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about batch scrape errors.
|
||||
* @param id - The ID of the batch scrape operation.
|
||||
* @returns Information about batch scrape errors.
|
||||
*/
|
||||
async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.deleteRequest(
|
||||
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, "check batch scrape errors");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new FirecrawlError(error.message, 500);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts information from URLs using the Firecrawl API.
|
||||
* Currently in Beta. Expect breaking changes on future minor versions.
|
||||
|
@ -309,6 +309,26 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'check crawl status')
|
||||
|
||||
def check_crawl_errors(self, id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns information about crawl errors.
|
||||
|
||||
Args:
|
||||
id (str): The ID of the crawl job.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Information about crawl errors.
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
return response.json()
|
||||
except:
|
||||
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||
else:
|
||||
self._handle_error(response, "check crawl errors")
|
||||
|
||||
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Cancel an asynchronous crawl job using the Firecrawl API.
|
||||
@ -546,6 +566,25 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'check batch scrape status')
|
||||
|
||||
def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns information about batch scrape errors.
|
||||
|
||||
Args:
|
||||
id (str): The ID of the crawl job.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Information about crawl errors.
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
return response.json()
|
||||
except:
|
||||
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||
else:
|
||||
self._handle_error(response, "check batch scrape errors")
|
||||
|
||||
def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user