mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 16:59:05 +08:00
feat(sdk): check crawl/batch scrape errors
This commit is contained in:
parent
dbc6d07871
commit
146dc47954
@ -314,6 +314,26 @@ export interface SearchResponse {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for crawl/batch scrape error monitoring.
|
||||||
|
*/
|
||||||
|
export interface CrawlErrorsResponse {
|
||||||
|
/**
|
||||||
|
* Scrapes that errored out + error details
|
||||||
|
*/
|
||||||
|
errors: {
|
||||||
|
id: string,
|
||||||
|
timestamp?: string,
|
||||||
|
url: string,
|
||||||
|
error: string,
|
||||||
|
}[];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* URLs blocked by robots.txt
|
||||||
|
*/
|
||||||
|
robotsBlocked: string[];
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
@ -621,6 +641,29 @@ export default class FirecrawlApp {
|
|||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns information about crawl errors.
|
||||||
|
* @param id - The ID of the crawl operation.
|
||||||
|
* @returns Information about crawl errors.
|
||||||
|
*/
|
||||||
|
async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
||||||
|
const headers = this.prepareHeaders();
|
||||||
|
try {
|
||||||
|
const response: AxiosResponse = await this.deleteRequest(
|
||||||
|
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
||||||
|
headers
|
||||||
|
);
|
||||||
|
if (response.status === 200) {
|
||||||
|
return response.data;
|
||||||
|
} else {
|
||||||
|
this.handleError(response, "check crawl errors");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
throw new FirecrawlError(error.message, 500);
|
||||||
|
}
|
||||||
|
return { success: false, error: "Internal server error." };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cancels a crawl job using the Firecrawl API.
|
* Cancels a crawl job using the Firecrawl API.
|
||||||
* @param id - The ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
@ -883,6 +926,29 @@ export default class FirecrawlApp {
|
|||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns information about batch scrape errors.
|
||||||
|
* @param id - The ID of the batch scrape operation.
|
||||||
|
* @returns Information about batch scrape errors.
|
||||||
|
*/
|
||||||
|
async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
||||||
|
const headers = this.prepareHeaders();
|
||||||
|
try {
|
||||||
|
const response: AxiosResponse = await this.deleteRequest(
|
||||||
|
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
||||||
|
headers
|
||||||
|
);
|
||||||
|
if (response.status === 200) {
|
||||||
|
return response.data;
|
||||||
|
} else {
|
||||||
|
this.handleError(response, "check batch scrape errors");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
throw new FirecrawlError(error.message, 500);
|
||||||
|
}
|
||||||
|
return { success: false, error: "Internal server error." };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts information from URLs using the Firecrawl API.
|
* Extracts information from URLs using the Firecrawl API.
|
||||||
* Currently in Beta. Expect breaking changes on future minor versions.
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
||||||
|
@ -309,6 +309,26 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
self._handle_error(response, 'check crawl status')
|
self._handle_error(response, 'check crawl status')
|
||||||
|
|
||||||
|
def check_crawl_errors(self, id: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Returns information about crawl errors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id (str): The ID of the crawl job.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: Information about crawl errors.
|
||||||
|
"""
|
||||||
|
headers = self._prepare_headers()
|
||||||
|
response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
|
else:
|
||||||
|
self._handle_error(response, "check crawl errors")
|
||||||
|
|
||||||
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Cancel an asynchronous crawl job using the Firecrawl API.
|
Cancel an asynchronous crawl job using the Firecrawl API.
|
||||||
@ -546,6 +566,25 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
self._handle_error(response, 'check batch scrape status')
|
self._handle_error(response, 'check batch scrape status')
|
||||||
|
|
||||||
|
def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Returns information about batch scrape errors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id (str): The ID of the crawl job.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: Information about crawl errors.
|
||||||
|
"""
|
||||||
|
headers = self._prepare_headers()
|
||||||
|
response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
|
else:
|
||||||
|
self._handle_error(response, "check batch scrape errors")
|
||||||
|
|
||||||
def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
|
def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user