v1 support for crawl/monitor status

This commit is contained in:
rafaelsideguide 2024-08-20 10:37:24 -03:00
parent 7727302ef1
commit fa89d2e535
2 changed files with 20 additions and 15 deletions

View File

@ -132,16 +132,14 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).toHaveProperty("creditsUsed");
expect(response.creditsUsed).toBeGreaterThan(0);
expect(response).toHaveProperty("expiresAt");
expect(response.expiresAt).toBeGreaterThan(Date.now());
expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
expect(response).toHaveProperty("status");
expect(response.status).toBe("completed");
expect(response).toHaveProperty("next");
expect(response.next).toBeDefined();
expect(response).not.toHaveProperty("next"); // wait until done
expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("markdown");
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty('content'); // v0
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty("html");
expect(response.data?.[0]).not.toHaveProperty("rawHtml");
expect(response.data?.[0]).not.toHaveProperty("screenshot");
@ -156,7 +154,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data?.[0].metadata).toHaveProperty("error");
}, 60000); // 60 seconds timeout
test.concurrent('should return successful response for crawl and wait for completion', async () => {
test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://roastmywebsite.ai', {
crawlerOptions: {
@ -184,16 +182,14 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).toHaveProperty("creditsUsed");
expect(response.creditsUsed).toBeGreaterThan(0);
expect(response).toHaveProperty("expiresAt");
expect(response.expiresAt).toBeGreaterThan(Date.now());
expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
expect(response).toHaveProperty("status");
expect(response.status).toBe("completed");
expect(response).toHaveProperty("next");
expect(response.next).toContain("/v1/crawl/");
expect(response).not.toHaveProperty("next");
expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("markdown");
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty('content'); // v0
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).toHaveProperty("html");
expect(response.data?.[0].html).toContain("<h1");
expect(response.data?.[0]).toHaveProperty("rawHtml");

View File

@ -458,9 +458,11 @@ export default class FirecrawlApp {
headers
);
if (response.status === 200) {
const jobId: string = response.data.jobId;
const jobId: string = this.version == 'v0' ? response.data.jobId : response.data.id;
let checkUrl: string | undefined = undefined;
if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, pollInterval);
if (this.version == 'v1') { checkUrl = response.data.url }
return this.monitorJobStatus(jobId, headers, pollInterval, checkUrl);
} else {
return { success: true, jobId };
}
@ -610,23 +612,30 @@ export default class FirecrawlApp {
async monitorJobStatus(
jobId: string,
headers: AxiosRequestHeaders,
checkInterval: number
checkInterval: number,
checkUrl?: string
): Promise<any> {
let apiUrl: string = '';
while (true) {
if (this.version == 'v1') {
apiUrl = checkUrl ?? this.apiUrl + `/v1/crawl/${jobId}`;
} else if (this.version == 'v0') {
apiUrl = checkUrl ?? this.apiUrl + `/v0/crawl/status/${jobId}`;
}
const statusResponse: AxiosResponse = await this.getRequest(
this.apiUrl + `/v0/crawl/status/${jobId}`,
apiUrl,
headers
);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
return statusData.data;
return this.version == 'v0' ? statusData.data : statusData;
} else {
throw new Error("Crawl job completed but no data was returned");
}
} else if (
["active", "paused", "pending", "queued"].includes(statusData.status)
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
) {
if (checkInterval < 2) {
checkInterval = 2;