diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index 9042d02e..cf6181fe 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -30,40 +30,84 @@ describe('FirecrawlApp E2E Tests', () => { const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; expect(response).not.toBeNull(); - expect(response.data?.content).toContain("_Roast_"); + expect(response.data?.markdown).toContain("_Roast_"); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; expect(response).not.toBeNull(); - expect(response.data?.content).toContain("_Roast_"); + expect(response.data).not.toHaveProperty('content'); // v0 + expect(response.data).not.toHaveProperty('html'); + expect(response.data).not.toHaveProperty('rawHtml'); + expect(response.data).not.toHaveProperty('screenshot'); + expect(response.data).not.toHaveProperty('links'); + expect(response.data).toHaveProperty('markdown'); expect(response.data).toHaveProperty('metadata'); - expect(response.data).not.toHaveProperty('html'); }, 30000); // 30 seconds timeout test.concurrent('should return successful response with valid API key and include HTML', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }) as ScrapeResponse; + const response = await app.scrapeUrl( + 'https://roastmywebsite.ai', { + formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], + headers: { "x-key": "test" }, + includeTags: ['h1'], + excludeTags: ['h2'], + onlyMainContent: true, + timeout: 30000, + waitFor: 1000 + }) as ScrapeResponse; expect(response).not.toBeNull(); - expect(response.data?.content).toContain("_Roast_"); + expect(response.data).not.toHaveProperty('content'); // v0 expect(response.data?.markdown).toContain("_Roast_"); expect(response.data?.html).toContain(" { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse; expect(response).not.toBeNull(); - expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + expect(response.data?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse; expect(response).not.toBeNull(); - expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + expect(response.data?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout test.concurrent('should throw error for invalid API key on crawl', async () => { @@ -79,19 +123,102 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should return successful response for crawl and wait for completion', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30) as JobStatusResponse; + const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as JobStatusResponse; expect(response).not.toBeNull(); - expect(response.data?.[0].content).toContain("_Roast_"); + + expect(response).toHaveProperty("totalCount"); + expect(response.totalCount).toBeGreaterThan(0); + expect(response).toHaveProperty("creditsUsed"); + expect(response.creditsUsed).toBeGreaterThan(0); + expect(response).toHaveProperty("expiresAt"); + expect(response.expiresAt).toBeGreaterThan(Date.now()); + expect(response).toHaveProperty("status"); + expect(response.status).toBe("completed"); + expect(response).toHaveProperty("next"); + expect(response.next).toBeDefined(); + expect(response.data?.length).toBeGreaterThan(0); + expect(response.data?.[0]).toHaveProperty("markdown"); + expect(response.data?.[0].markdown).toContain("_Roast_"); + expect(response.data?.[0]).not.toHaveProperty('content'); // v0 + expect(response.data?.[0].markdown).toContain("_Roast_"); + expect(response.data?.[0]).not.toHaveProperty("html"); + expect(response.data?.[0]).not.toHaveProperty("rawHtml"); + expect(response.data?.[0]).not.toHaveProperty("screenshot"); + expect(response.data?.[0]).not.toHaveProperty("links"); + + expect(response.data?.[0]).toHaveProperty("metadata"); + expect(response.data?.[0].metadata).toHaveProperty("title"); + expect(response.data?.[0].metadata).toHaveProperty("description"); + expect(response.data?.[0].metadata).toHaveProperty("language"); + expect(response.data?.[0].metadata).toHaveProperty("sourceURL"); + expect(response.data?.[0].metadata).toHaveProperty("statusCode"); + expect(response.data?.[0].metadata).toHaveProperty("error"); + }, 60000); // 60 seconds timeout + + test.concurrent('should return successful response for crawl and wait for completion', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://roastmywebsite.ai', { + crawlerOptions: { + excludePaths: ['blog/*'], + includePaths: ['/'], + maxDepth: 2, + ignoreSitemap: true, + limit: 10, + allowBackwardLinks: true, + allowExternalLinks: true, + }, + pageOptions: { + formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], + headers: { "x-key": "test" }, + includeTags: ['h1'], + excludeTags: ['h2'], + onlyMainContent: true, + timeout: 30000, + waitFor: 1000 + } + }, true, 30) as JobStatusResponse; + expect(response).not.toBeNull(); + expect(response).toHaveProperty("totalCount"); + expect(response.totalCount).toBeGreaterThan(0); + expect(response).toHaveProperty("creditsUsed"); + expect(response.creditsUsed).toBeGreaterThan(0); + expect(response).toHaveProperty("expiresAt"); + expect(response.expiresAt).toBeGreaterThan(Date.now()); + expect(response).toHaveProperty("status"); + expect(response.status).toBe("completed"); + expect(response).toHaveProperty("next"); + expect(response.next).toContain("/v1/crawl/"); + expect(response.data?.length).toBeGreaterThan(0); + expect(response.data?.[0]).toHaveProperty("markdown"); + expect(response.data?.[0].markdown).toContain("_Roast_"); + expect(response.data?.[0]).not.toHaveProperty('content'); // v0 + expect(response.data?.[0].markdown).toContain("_Roast_"); + expect(response.data?.[0]).toHaveProperty("html"); + expect(response.data?.[0].html).toContain(" { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const uniqueIdempotencyKey = uuidv4(); - const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey) as CrawlResponse; + const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse; expect(response).not.toBeNull(); expect(response.jobId).toBeDefined(); - await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); + await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); }); test.concurrent('should check crawl status', async () => { @@ -104,19 +231,56 @@ describe('FirecrawlApp E2E Tests', () => { const maxChecks = 15; let checks = 0; - while (statusResponse.status === 'active' && checks < maxChecks) { + while (statusResponse.status === 'scraping' && checks < maxChecks) { await new Promise(resolve => setTimeout(resolve, 1000)); - expect(statusResponse.partial_data).not.toBeNull(); - expect(statusResponse.current).toBeGreaterThanOrEqual(1); + expect(statusResponse).not.toHaveProperty("partial_data"); // v0 + expect(statusResponse).not.toHaveProperty("current"); // v0 + expect(statusResponse).toHaveProperty("data"); + expect(statusResponse).toHaveProperty("totalCount"); + expect(statusResponse).toHaveProperty("creditsUsed"); + expect(statusResponse).toHaveProperty("expiresAt"); + expect(statusResponse).toHaveProperty("status"); + expect(statusResponse).toHaveProperty("next"); + expect(statusResponse.totalCount).toBeGreaterThan(0); + expect(statusResponse.creditsUsed).toBeGreaterThan(0); + expect(statusResponse.expiresAt).toBeGreaterThan(Date.now()); + expect(statusResponse.status).toBe("scraping"); + expect(statusResponse.next).toContain("/v1/crawl/"); statusResponse = await app.checkCrawlStatus(response.jobId) as CrawlResponse; checks++; } - expect(statusResponse).not.toBeNull(); - expect(statusResponse.success).toBe(true); - expect(statusResponse.status).toBe('completed'); - expect(statusResponse.total).toEqual(statusResponse.current); - expect(statusResponse.current_step).not.toBeNull(); - expect(statusResponse?.data?.length).toBeGreaterThan(0); + expect(response).not.toBeNull(); + expect(response).toHaveProperty("totalCount"); + expect(response.totalCount).toBeGreaterThan(0); + expect(response).toHaveProperty("creditsUsed"); + expect(response.creditsUsed).toBeGreaterThan(0); + expect(response).toHaveProperty("expiresAt"); + expect(response.expiresAt).toBeGreaterThan(Date.now()); + expect(response).toHaveProperty("status"); + expect(response.status).toBe("completed"); + expect(response).toHaveProperty("next"); + expect(response.next).toContain("/v1/crawl/"); + expect(response.data?.length).toBeGreaterThan(0); + expect(response.data?.[0]).toHaveProperty("markdown"); + expect(response.data?.[0].markdown).toContain("_Roast_"); + expect(response.data?.[0]).not.toHaveProperty('content'); // v0 + expect(response.data?.[0].markdown).toContain("_Roast_"); + expect(response.data?.[0]).toHaveProperty("html"); + expect(response.data?.[0].html).toContain("