v1 support for crawl/monitor status

This commit is contained in:
rafaelsideguide 2024-08-20 10:37:24 -03:00
parent 7727302ef1
commit fa89d2e535
2 changed files with 20 additions and 15 deletions

View File

@ -132,16 +132,14 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).toHaveProperty("creditsUsed"); expect(response).toHaveProperty("creditsUsed");
expect(response.creditsUsed).toBeGreaterThan(0); expect(response.creditsUsed).toBeGreaterThan(0);
expect(response).toHaveProperty("expiresAt"); expect(response).toHaveProperty("expiresAt");
expect(response.expiresAt).toBeGreaterThan(Date.now()); expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
expect(response).toHaveProperty("status"); expect(response).toHaveProperty("status");
expect(response.status).toBe("completed"); expect(response.status).toBe("completed");
expect(response).toHaveProperty("next"); expect(response).not.toHaveProperty("next"); // wait until done
expect(response.next).toBeDefined();
expect(response.data?.length).toBeGreaterThan(0); expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("markdown"); expect(response.data?.[0]).toHaveProperty("markdown");
expect(response.data?.[0].markdown).toContain("_Roast_"); expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty('content'); // v0 expect(response.data?.[0]).not.toHaveProperty('content'); // v0
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty("html"); expect(response.data?.[0]).not.toHaveProperty("html");
expect(response.data?.[0]).not.toHaveProperty("rawHtml"); expect(response.data?.[0]).not.toHaveProperty("rawHtml");
expect(response.data?.[0]).not.toHaveProperty("screenshot"); expect(response.data?.[0]).not.toHaveProperty("screenshot");
@ -156,7 +154,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data?.[0].metadata).toHaveProperty("error"); expect(response.data?.[0].metadata).toHaveProperty("error");
}, 60000); // 60 seconds timeout }, 60000); // 60 seconds timeout
test.concurrent('should return successful response for crawl and wait for completion', async () => { test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://roastmywebsite.ai', { const response = await app.crawlUrl('https://roastmywebsite.ai', {
crawlerOptions: { crawlerOptions: {
@ -184,16 +182,14 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).toHaveProperty("creditsUsed"); expect(response).toHaveProperty("creditsUsed");
expect(response.creditsUsed).toBeGreaterThan(0); expect(response.creditsUsed).toBeGreaterThan(0);
expect(response).toHaveProperty("expiresAt"); expect(response).toHaveProperty("expiresAt");
expect(response.expiresAt).toBeGreaterThan(Date.now()); expect(new Date(response.expiresAt).getTime()).toBeGreaterThan(Date.now());
expect(response).toHaveProperty("status"); expect(response).toHaveProperty("status");
expect(response.status).toBe("completed"); expect(response.status).toBe("completed");
expect(response).toHaveProperty("next"); expect(response).not.toHaveProperty("next");
expect(response.next).toContain("/v1/crawl/");
expect(response.data?.length).toBeGreaterThan(0); expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("markdown"); expect(response.data?.[0]).toHaveProperty("markdown");
expect(response.data?.[0].markdown).toContain("_Roast_"); expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty('content'); // v0 expect(response.data?.[0]).not.toHaveProperty('content'); // v0
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).toHaveProperty("html"); expect(response.data?.[0]).toHaveProperty("html");
expect(response.data?.[0].html).toContain("<h1"); expect(response.data?.[0].html).toContain("<h1");
expect(response.data?.[0]).toHaveProperty("rawHtml"); expect(response.data?.[0]).toHaveProperty("rawHtml");

View File

@ -458,9 +458,11 @@ export default class FirecrawlApp {
headers headers
); );
if (response.status === 200) { if (response.status === 200) {
const jobId: string = response.data.jobId; const jobId: string = this.version == 'v0' ? response.data.jobId : response.data.id;
let checkUrl: string | undefined = undefined;
if (waitUntilDone) { if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, pollInterval); if (this.version == 'v1') { checkUrl = response.data.url }
return this.monitorJobStatus(jobId, headers, pollInterval, checkUrl);
} else { } else {
return { success: true, jobId }; return { success: true, jobId };
} }
@ -610,23 +612,30 @@ export default class FirecrawlApp {
async monitorJobStatus( async monitorJobStatus(
jobId: string, jobId: string,
headers: AxiosRequestHeaders, headers: AxiosRequestHeaders,
checkInterval: number checkInterval: number,
checkUrl?: string
): Promise<any> { ): Promise<any> {
let apiUrl: string = '';
while (true) { while (true) {
if (this.version == 'v1') {
apiUrl = checkUrl ?? this.apiUrl + `/v1/crawl/${jobId}`;
} else if (this.version == 'v0') {
apiUrl = checkUrl ?? this.apiUrl + `/v0/crawl/status/${jobId}`;
}
const statusResponse: AxiosResponse = await this.getRequest( const statusResponse: AxiosResponse = await this.getRequest(
this.apiUrl + `/v0/crawl/status/${jobId}`, apiUrl,
headers headers
); );
if (statusResponse.status === 200) { if (statusResponse.status === 200) {
const statusData = statusResponse.data; const statusData = statusResponse.data;
if (statusData.status === "completed") { if (statusData.status === "completed") {
if ("data" in statusData) { if ("data" in statusData) {
return statusData.data; return this.version == 'v0' ? statusData.data : statusData;
} else { } else {
throw new Error("Crawl job completed but no data was returned"); throw new Error("Crawl job completed but no data was returned");
} }
} else if ( } else if (
["active", "paused", "pending", "queued"].includes(statusData.status) ["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
) { ) {
if (checkInterval < 2) { if (checkInterval < 2) {
checkInterval = 2; checkInterval = 2;