feat(js-sdk): paginate next on checkCrawlStatus + better types for CSR

This commit is contained in:
Gergő Móricz 2024-09-10 19:29:38 +02:00
parent 4ebc35c9dd
commit e19f7a102e
2 changed files with 38 additions and 26 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.2.2", "version": "1.2.3",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "build/cjs/index.js", "main": "build/cjs/index.js",
"types": "types/index.d.ts", "types": "types/index.d.ts",

View File

@ -131,15 +131,14 @@ export interface CrawlResponse {
*/ */
export interface CrawlStatusResponse { export interface CrawlStatusResponse {
success: true; success: true;
total: number; status: "scraping" | "completed" | "failed" | "cancelled";
completed: number; completed: number;
total: number;
creditsUsed: number; creditsUsed: number;
expiresAt: Date; expiresAt: Date;
status: "scraping" | "completed" | "failed"; next?: string;
next: string; data: FirecrawlDocument[];
data?: FirecrawlDocument[]; };
error?: string;
}
/** /**
* Parameters for mapping operations. * Parameters for mapping operations.
@ -329,9 +328,10 @@ export default class FirecrawlApp {
/** /**
* Checks the status of a crawl job using the Firecrawl API. * Checks the status of a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation. * @param id - The ID of the crawl operation.
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
* @returns The response containing the job status. * @returns The response containing the job status.
*/ */
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> { async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
if (!id) { if (!id) {
throw new Error("No crawl ID provided"); throw new Error("No crawl ID provided");
} }
@ -342,17 +342,29 @@ export default class FirecrawlApp {
`${this.apiUrl}/v1/crawl/${id}`, `${this.apiUrl}/v1/crawl/${id}`,
headers headers
); );
if (response.status === 200) { if (response.status === 200 && getAllData) {
let allData = response.data.data;
if (response.data.status === "completed") {
let statusData = response.data
if ("data" in statusData) {
let data = statusData.data;
while ('next' in statusData) {
statusData = (await this.getRequest(statusData.next, headers)).data;
data = data.concat(statusData.data);
}
allData = data;
}
}
return ({ return ({
success: true, success: response.data.success,
status: response.data.status, status: response.data.status,
total: response.data.total, total: response.data.total,
completed: response.data.completed, completed: response.data.completed,
creditsUsed: response.data.creditsUsed, creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt), expiresAt: new Date(response.data.expiresAt),
next: response.data.next, next: response.data.next,
data: response.data.data, data: allData,
error: response.data.error error: response.data.error,
}) })
} else { } else {
this.handleError(response, "check crawl status"); this.handleError(response, "check crawl status");
@ -452,7 +464,7 @@ export default class FirecrawlApp {
id: string, id: string,
headers: AxiosRequestHeaders, headers: AxiosRequestHeaders,
checkInterval: number checkInterval: number
): Promise<CrawlStatusResponse> { ): Promise<CrawlStatusResponse | ErrorResponse> {
while (true) { while (true) {
let statusResponse: AxiosResponse = await this.getRequest( let statusResponse: AxiosResponse = await this.getRequest(
`${this.apiUrl}/v1/crawl/${id}`, `${this.apiUrl}/v1/crawl/${id}`,