From fb8a2c75497c0221abde4b14e3c73a7926317c15 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:03:33 -0400 Subject: [PATCH] fixed screenshot typo and added test for fullpage screenshot --- apps/js-sdk/firecrawl/package-lock.json | 4 +- apps/js-sdk/firecrawl/package.json | 2 +- .../__tests__/v1/e2e_withAuth/index.test.ts | 47 +++++++++++++++++-- apps/js-sdk/firecrawl/src/index.ts | 2 +- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 2dcca44d..e27e259d 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,12 +1,12 @@ { "name": "@mendable/firecrawl-js", - "version": "1.3.0", + "version": "1.4.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "1.3.0", + "version": "1.4.3", "license": "MIT", "dependencies": { "axios": "^1.6.8", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index f6f14fb2..45fbcee9 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.4.2", + "version": "1.4.3", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index 98a52538..dea55846 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -28,14 +28,22 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should return successful response with valid preview token', async () => { const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); - const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; + const response = await app.scrapeUrl('https://roastmywebsite.ai'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response?.markdown).toContain("_Roast_"); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; + const response = await app.scrapeUrl('https://roastmywebsite.ai'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response).not.toHaveProperty('content'); // v0 expect(response).not.toHaveProperty('html'); @@ -58,7 +66,11 @@ describe('FirecrawlApp E2E Tests', () => { onlyMainContent: true, timeout: 30000, waitFor: 1000 - }) as ScrapeResponse; + }); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response).not.toHaveProperty('content'); // v0 expect(response.markdown).toContain("_Roast_"); @@ -86,6 +98,7 @@ describe('FirecrawlApp E2E Tests', () => { expect(response.metadata).not.toHaveProperty("pageStatusCode"); expect(response.metadata).toHaveProperty("statusCode"); expect(response.metadata).not.toHaveProperty("pageError"); + if (response.metadata !== undefined) { expect(response.metadata.error).toBeUndefined(); expect(response.metadata.title).toBe("Roast My Website"); @@ -103,16 +116,40 @@ describe('FirecrawlApp E2E Tests', () => { } }, 30000); // 30 seconds timeout + test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl( + 'https://roastmywebsite.ai', { + formats: ['screenshot@fullPage'], + }); + if (!response.success) { + throw new Error(response.error); + } + + expect(response).not.toBeNull(); + expect(response.screenshot).not.toBeUndefined(); + expect(response.screenshot).not.toBeNull(); + expect(response.screenshot).toContain("https://"); + }, 30000); // 30 seconds timeout + test.concurrent('should return successful response for valid scrape with PDF file', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse; + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse; + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 661ce34b..b06a037d 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -74,7 +74,7 @@ export interface FirecrawlDocument { * Defines the options and configurations available for scraping web content. */ export interface CrawlScrapeOptions { - formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[]; + formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[]; headers?: Record; includeTags?: string[]; excludeTags?: string[];