mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-10 16:59:02 +08:00
feat(js-sdk): paginate next on checkCrawlStatus + better types for CSR
This commit is contained in:
parent
4ebc35c9dd
commit
e19f7a102e
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.2.2",
|
||||
"version": "1.2.3",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/cjs/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
|
@ -131,15 +131,14 @@ export interface CrawlResponse {
|
||||
*/
|
||||
export interface CrawlStatusResponse {
|
||||
success: true;
|
||||
total: number;
|
||||
status: "scraping" | "completed" | "failed" | "cancelled";
|
||||
completed: number;
|
||||
total: number;
|
||||
creditsUsed: number;
|
||||
expiresAt: Date;
|
||||
status: "scraping" | "completed" | "failed";
|
||||
next: string;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
next?: string;
|
||||
data: FirecrawlDocument[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Parameters for mapping operations.
|
||||
@ -329,9 +328,10 @@ export default class FirecrawlApp {
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
||||
* @returns The response containing the job status.
|
||||
*/
|
||||
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
if (!id) {
|
||||
throw new Error("No crawl ID provided");
|
||||
}
|
||||
@ -342,17 +342,29 @@ export default class FirecrawlApp {
|
||||
`${this.apiUrl}/v1/crawl/${id}`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
if (response.status === 200 && getAllData) {
|
||||
let allData = response.data.data;
|
||||
if (response.data.status === "completed") {
|
||||
let statusData = response.data
|
||||
if ("data" in statusData) {
|
||||
let data = statusData.data;
|
||||
while ('next' in statusData) {
|
||||
statusData = (await this.getRequest(statusData.next, headers)).data;
|
||||
data = data.concat(statusData.data);
|
||||
}
|
||||
allData = data;
|
||||
}
|
||||
}
|
||||
return ({
|
||||
success: true,
|
||||
success: response.data.success,
|
||||
status: response.data.status,
|
||||
total: response.data.total,
|
||||
completed: response.data.completed,
|
||||
creditsUsed: response.data.creditsUsed,
|
||||
expiresAt: new Date(response.data.expiresAt),
|
||||
next: response.data.next,
|
||||
data: response.data.data,
|
||||
error: response.data.error
|
||||
data: allData,
|
||||
error: response.data.error,
|
||||
})
|
||||
} else {
|
||||
this.handleError(response, "check crawl status");
|
||||
@ -452,7 +464,7 @@ export default class FirecrawlApp {
|
||||
id: string,
|
||||
headers: AxiosRequestHeaders,
|
||||
checkInterval: number
|
||||
): Promise<CrawlStatusResponse> {
|
||||
): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
while (true) {
|
||||
let statusResponse: AxiosResponse = await this.getRequest(
|
||||
`${this.apiUrl}/v1/crawl/${id}`,
|
||||
@ -460,20 +472,20 @@ export default class FirecrawlApp {
|
||||
);
|
||||
if (statusResponse.status === 200) {
|
||||
let statusData = statusResponse.data;
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
let data = statusData.data;
|
||||
while ('next' in statusData) {
|
||||
statusResponse = await this.getRequest(statusData.next, headers);
|
||||
statusData = statusResponse.data;
|
||||
data = data.concat(statusData.data);
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
let data = statusData.data;
|
||||
while ('next' in statusData) {
|
||||
statusResponse = await this.getRequest(statusData.next, headers);
|
||||
statusData = statusResponse.data;
|
||||
data = data.concat(statusData.data);
|
||||
}
|
||||
statusData.data = data;
|
||||
return statusData;
|
||||
} else {
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
statusData.data = data;
|
||||
return statusData;
|
||||
} else {
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
} else if (
|
||||
} else if (
|
||||
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
||||
) {
|
||||
checkInterval = Math.max(checkInterval, 2);
|
||||
|
Loading…
x
Reference in New Issue
Block a user