mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 05:36:02 +08:00
wip
This commit is contained in:
parent
6cdf4c68ec
commit
d599d31e63
@ -1,4 +1,4 @@
|
|||||||
import FirecrawlApp from '../../index';
|
import FirecrawlApp, { CrawlResponseV0, FirecrawlDocumentV0, JobStatusResponseV0, ScrapeResponseV0, SearchResponseV0 } from '../../index';
|
||||||
import { v4 as uuidv4 } from 'uuid';
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
import dotenv from 'dotenv';
|
import dotenv from 'dotenv';
|
||||||
import { describe, test, expect } from '@jest/globals';
|
import { describe, test, expect } from '@jest/globals';
|
||||||
@ -11,31 +11,31 @@ const API_URL = "http://127.0.0.1:3002";
|
|||||||
describe('FirecrawlApp E2E Tests', () => {
|
describe('FirecrawlApp E2E Tests', () => {
|
||||||
test.concurrent('should throw error for no API key', async () => {
|
test.concurrent('should throw error for no API key', async () => {
|
||||||
expect(() => {
|
expect(() => {
|
||||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL, version: "v0" });
|
||||||
}).toThrow("No API key provided");
|
}).toThrow("No API key provided");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
|
||||||
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should return successful response with valid preview token', async () => {
|
test.concurrent('should return successful response with valid preview token', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain("_Roast_");
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for valid scrape', async () => {
|
test.concurrent('should return successful response for valid scrape', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain("_Roast_");
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
expect(response.data).toHaveProperty('markdown');
|
expect(response.data).toHaveProperty('markdown');
|
||||||
@ -44,8 +44,8 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
|
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }) as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain("_Roast_");
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
expect(response.data?.markdown).toContain("_Roast_");
|
expect(response.data?.markdown).toContain("_Roast_");
|
||||||
@ -53,41 +53,41 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
|
||||||
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30) as CrawlResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response[0].content).toContain("_Roast_");
|
expect(response[0].content).toContain("_Roast_");
|
||||||
}, 60000); // 60 seconds timeout
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should handle idempotency key for crawl', async () => {
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const uniqueIdempotencyKey = uuidv4();
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey) as CrawlResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.jobId).toBeDefined();
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
@ -95,12 +95,12 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should check crawl status', async () => {
|
test.concurrent('should check crawl status', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
const response: any = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false) as JobStatusResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.jobId).toBeDefined();
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
let statusResponse = await app.checkCrawlStatus(response.jobId);
|
let statusResponse: any = await app.checkCrawlStatus(response.jobId);
|
||||||
const maxChecks = 15;
|
const maxChecks = 15;
|
||||||
let checks = 0;
|
let checks = 0;
|
||||||
|
|
||||||
@ -108,7 +108,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
expect(statusResponse.partial_data).not.toBeNull();
|
expect(statusResponse.partial_data).not.toBeNull();
|
||||||
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
||||||
statusResponse = await app.checkCrawlStatus(response.jobId);
|
statusResponse = await app.checkCrawlStatus(response.jobId) as CrawlResponseV0;
|
||||||
checks++;
|
checks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,20 +121,20 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}, 35000); // 35 seconds timeout
|
}, 35000); // 35 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for search', async () => {
|
test.concurrent('should return successful response for search', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.search("test query");
|
const response = await app.search("test query") as SearchResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response?.data?.[0]?.content).toBeDefined();
|
expect(response?.data?.[0]?.content).toBeDefined();
|
||||||
expect(response?.data?.length).toBeGreaterThan(2);
|
expect(response?.data?.length).toBeGreaterThan(2);
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on search', async () => {
|
test.concurrent('should throw error for invalid API key on search', async () => {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
|
||||||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should perform LLM extraction', async () => {
|
test.concurrent('should perform LLM extraction', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
||||||
const response = await app.scrapeUrl("https://mendable.ai", {
|
const response = await app.scrapeUrl("https://mendable.ai", {
|
||||||
extractorOptions: {
|
extractorOptions: {
|
||||||
mode: 'llm-extraction',
|
mode: 'llm-extraction',
|
||||||
@ -149,7 +149,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
required: ['company_mission', 'supports_sso', 'is_open_source']
|
required: ['company_mission', 'supports_sso', 'is_open_source']
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
}) as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.llm_extraction).toBeDefined();
|
expect(response.data?.llm_extraction).toBeDefined();
|
||||||
const llmExtraction = response.data?.llm_extraction;
|
const llmExtraction = response.data?.llm_extraction;
|
||||||
|
@ -0,0 +1,122 @@
|
|||||||
|
import FirecrawlApp, { CrawlResponse, JobStatusResponse, ScrapeResponse } from '../../../index';
|
||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import { describe, test, expect } from '@jest/globals';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||||
|
const API_URL = "http://127.0.0.1:3002";
|
||||||
|
|
||||||
|
describe('FirecrawlApp E2E Tests', () => {
|
||||||
|
test.concurrent('should throw error for no API key', async () => {
|
||||||
|
expect(() => {
|
||||||
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||||
|
}).toThrow("No API key provided");
|
||||||
|
});
|
||||||
|
|
||||||
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||||
|
});
|
||||||
|
|
||||||
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||||
|
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
|
});
|
||||||
|
|
||||||
|
test.concurrent('should return successful response with valid preview token', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should return successful response for valid scrape', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
|
expect(response.data).toHaveProperty('markdown');
|
||||||
|
expect(response.data).toHaveProperty('metadata');
|
||||||
|
expect(response.data).not.toHaveProperty('html');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }) as ScrapeResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
|
expect(response.data?.markdown).toContain("_Roast_");
|
||||||
|
expect(response.data?.html).toContain("<h1");
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||||
|
});
|
||||||
|
|
||||||
|
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||||
|
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
|
});
|
||||||
|
|
||||||
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30) as JobStatusResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.[0].content).toContain("_Roast_");
|
||||||
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey) as CrawlResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
|
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||||
|
});
|
||||||
|
|
||||||
|
test.concurrent('should check crawl status', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response: any = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false) as JobStatusResponse;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
|
let statusResponse: any = await app.checkCrawlStatus(response.jobId);
|
||||||
|
const maxChecks = 15;
|
||||||
|
let checks = 0;
|
||||||
|
|
||||||
|
while (statusResponse.status === 'active' && checks < maxChecks) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
expect(statusResponse.partial_data).not.toBeNull();
|
||||||
|
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
||||||
|
statusResponse = await app.checkCrawlStatus(response.jobId) as CrawlResponse;
|
||||||
|
checks++;
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(statusResponse).not.toBeNull();
|
||||||
|
expect(statusResponse.success).toBe(true);
|
||||||
|
expect(statusResponse.status).toBe('completed');
|
||||||
|
expect(statusResponse.total).toEqual(statusResponse.current);
|
||||||
|
expect(statusResponse.current_step).not.toBeNull();
|
||||||
|
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
||||||
|
}, 35000); // 35 seconds timeout
|
||||||
|
});
|
Loading…
x
Reference in New Issue
Block a user