mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 03:35:56 +08:00
feat(js-sdk): paginate next on checkCrawlStatus + better types for CSR
This commit is contained in:
parent
4ebc35c9dd
commit
e19f7a102e
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.2.2",
|
"version": "1.2.3",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/cjs/index.js",
|
"main": "build/cjs/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
|
@ -131,15 +131,14 @@ export interface CrawlResponse {
|
|||||||
*/
|
*/
|
||||||
export interface CrawlStatusResponse {
|
export interface CrawlStatusResponse {
|
||||||
success: true;
|
success: true;
|
||||||
total: number;
|
status: "scraping" | "completed" | "failed" | "cancelled";
|
||||||
completed: number;
|
completed: number;
|
||||||
|
total: number;
|
||||||
creditsUsed: number;
|
creditsUsed: number;
|
||||||
expiresAt: Date;
|
expiresAt: Date;
|
||||||
status: "scraping" | "completed" | "failed";
|
next?: string;
|
||||||
next: string;
|
data: FirecrawlDocument[];
|
||||||
data?: FirecrawlDocument[];
|
};
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parameters for mapping operations.
|
* Parameters for mapping operations.
|
||||||
@ -329,9 +328,10 @@ export default class FirecrawlApp {
|
|||||||
/**
|
/**
|
||||||
* Checks the status of a crawl job using the Firecrawl API.
|
* Checks the status of a crawl job using the Firecrawl API.
|
||||||
* @param id - The ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
||||||
* @returns The response containing the job status.
|
* @returns The response containing the job status.
|
||||||
*/
|
*/
|
||||||
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||||
if (!id) {
|
if (!id) {
|
||||||
throw new Error("No crawl ID provided");
|
throw new Error("No crawl ID provided");
|
||||||
}
|
}
|
||||||
@ -342,17 +342,29 @@ export default class FirecrawlApp {
|
|||||||
`${this.apiUrl}/v1/crawl/${id}`,
|
`${this.apiUrl}/v1/crawl/${id}`,
|
||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
if (response.status === 200) {
|
if (response.status === 200 && getAllData) {
|
||||||
|
let allData = response.data.data;
|
||||||
|
if (response.data.status === "completed") {
|
||||||
|
let statusData = response.data
|
||||||
|
if ("data" in statusData) {
|
||||||
|
let data = statusData.data;
|
||||||
|
while ('next' in statusData) {
|
||||||
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
||||||
|
data = data.concat(statusData.data);
|
||||||
|
}
|
||||||
|
allData = data;
|
||||||
|
}
|
||||||
|
}
|
||||||
return ({
|
return ({
|
||||||
success: true,
|
success: response.data.success,
|
||||||
status: response.data.status,
|
status: response.data.status,
|
||||||
total: response.data.total,
|
total: response.data.total,
|
||||||
completed: response.data.completed,
|
completed: response.data.completed,
|
||||||
creditsUsed: response.data.creditsUsed,
|
creditsUsed: response.data.creditsUsed,
|
||||||
expiresAt: new Date(response.data.expiresAt),
|
expiresAt: new Date(response.data.expiresAt),
|
||||||
next: response.data.next,
|
next: response.data.next,
|
||||||
data: response.data.data,
|
data: allData,
|
||||||
error: response.data.error
|
error: response.data.error,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
this.handleError(response, "check crawl status");
|
this.handleError(response, "check crawl status");
|
||||||
@ -452,7 +464,7 @@ export default class FirecrawlApp {
|
|||||||
id: string,
|
id: string,
|
||||||
headers: AxiosRequestHeaders,
|
headers: AxiosRequestHeaders,
|
||||||
checkInterval: number
|
checkInterval: number
|
||||||
): Promise<CrawlStatusResponse> {
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||||
while (true) {
|
while (true) {
|
||||||
let statusResponse: AxiosResponse = await this.getRequest(
|
let statusResponse: AxiosResponse = await this.getRequest(
|
||||||
`${this.apiUrl}/v1/crawl/${id}`,
|
`${this.apiUrl}/v1/crawl/${id}`,
|
||||||
@ -460,20 +472,20 @@ export default class FirecrawlApp {
|
|||||||
);
|
);
|
||||||
if (statusResponse.status === 200) {
|
if (statusResponse.status === 200) {
|
||||||
let statusData = statusResponse.data;
|
let statusData = statusResponse.data;
|
||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
let data = statusData.data;
|
let data = statusData.data;
|
||||||
while ('next' in statusData) {
|
while ('next' in statusData) {
|
||||||
statusResponse = await this.getRequest(statusData.next, headers);
|
statusResponse = await this.getRequest(statusData.next, headers);
|
||||||
statusData = statusResponse.data;
|
statusData = statusResponse.data;
|
||||||
data = data.concat(statusData.data);
|
data = data.concat(statusData.data);
|
||||||
|
}
|
||||||
|
statusData.data = data;
|
||||||
|
return statusData;
|
||||||
|
} else {
|
||||||
|
throw new Error("Crawl job completed but no data was returned");
|
||||||
}
|
}
|
||||||
statusData.data = data;
|
} else if (
|
||||||
return statusData;
|
|
||||||
} else {
|
|
||||||
throw new Error("Crawl job completed but no data was returned");
|
|
||||||
}
|
|
||||||
} else if (
|
|
||||||
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
||||||
) {
|
) {
|
||||||
checkInterval = Math.max(checkInterval, 2);
|
checkInterval = Math.max(checkInterval, 2);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user