feat(js-sdk): paginate next on checkCrawlStatus + better types for CSR

This commit is contained in:
Gergő Móricz 2024-09-10 19:29:38 +02:00
parent 4ebc35c9dd
commit e19f7a102e
2 changed files with 38 additions and 26 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.2.2",
"version": "1.2.3",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/cjs/index.js",
"types": "types/index.d.ts",

View File

@ -131,15 +131,14 @@ export interface CrawlResponse {
*/
export interface CrawlStatusResponse {
success: true;
total: number;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed: number;
expiresAt: Date;
status: "scraping" | "completed" | "failed";
next: string;
data?: FirecrawlDocument[];
error?: string;
}
next?: string;
data: FirecrawlDocument[];
};
/**
* Parameters for mapping operations.
@ -329,9 +328,10 @@ export default class FirecrawlApp {
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation.
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
* @returns The response containing the job status.
*/
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
if (!id) {
throw new Error("No crawl ID provided");
}
@ -342,17 +342,29 @@ export default class FirecrawlApp {
`${this.apiUrl}/v1/crawl/${id}`,
headers
);
if (response.status === 200) {
if (response.status === 200 && getAllData) {
let allData = response.data.data;
if (response.data.status === "completed") {
let statusData = response.data
if ("data" in statusData) {
let data = statusData.data;
while ('next' in statusData) {
statusData = (await this.getRequest(statusData.next, headers)).data;
data = data.concat(statusData.data);
}
allData = data;
}
}
return ({
success: true,
success: response.data.success,
status: response.data.status,
total: response.data.total,
completed: response.data.completed,
creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt),
next: response.data.next,
data: response.data.data,
error: response.data.error
data: allData,
error: response.data.error,
})
} else {
this.handleError(response, "check crawl status");
@ -452,7 +464,7 @@ export default class FirecrawlApp {
id: string,
headers: AxiosRequestHeaders,
checkInterval: number
): Promise<CrawlStatusResponse> {
): Promise<CrawlStatusResponse | ErrorResponse> {
while (true) {
let statusResponse: AxiosResponse = await this.getRequest(
`${this.apiUrl}/v1/crawl/${id}`,
@ -460,20 +472,20 @@ export default class FirecrawlApp {
);
if (statusResponse.status === 200) {
let statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
let data = statusData.data;
while ('next' in statusData) {
statusResponse = await this.getRequest(statusData.next, headers);
statusData = statusResponse.data;
data = data.concat(statusData.data);
if (statusData.status === "completed") {
if ("data" in statusData) {
let data = statusData.data;
while ('next' in statusData) {
statusResponse = await this.getRequest(statusData.next, headers);
statusData = statusResponse.data;
data = data.concat(statusData.data);
}
statusData.data = data;
return statusData;
} else {
throw new Error("Crawl job completed but no data was returned");
}
statusData.data = data;
return statusData;
} else {
throw new Error("Crawl job completed but no data was returned");
}
} else if (
} else if (
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
) {
checkInterval = Math.max(checkInterval, 2);