From 6002bf322872f1ad849bbecc0c26636e3d22b10f Mon Sep 17 00:00:00 2001 From: Thomas Kosmas Date: Thu, 19 Dec 2024 14:52:43 +0200 Subject: [PATCH 1/3] feat: dynamically import WebSocket module with error handling --- apps/js-sdk/firecrawl/src/index.ts | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 0d19ab60..7eef05f8 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,7 +1,24 @@ import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios"; import type * as zt from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; -import { WebSocket } from "isows"; + +import type { WebSocket as IsowsWebSocket } from 'isows'; +/** + * Dynamically imports the WebSocket class from 'isows'. + * If the import fails, WebSocket is set to null. + * This approach is used because some environments, such as Firebase Functions, + * might not support WebSocket natively. + */ +const WebSocket: typeof IsowsWebSocket | null = await (async () => { + try { + const module = await import('isows'); + return module.WebSocket; + } catch (error) { + console.error("Failed to load 'isows' module:", error); + return null; + } +})(); + import { TypedEventTarget } from "typescript-event-target"; /** @@ -938,6 +955,8 @@ export class CrawlWatcher extends TypedEventTarget { constructor(id: string, app: FirecrawlApp) { super(); + if(!WebSocket) + throw new FirecrawlError("WebSocket module failed to load. Your system might not support WebSocket.", 500); this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); this.status = "scraping"; this.data = []; From c8cd0148dd86e8903a3b8cf16b87841262d3c1e6 Mon Sep 17 00:00:00 2001 From: Thomas Kosmas Date: Thu, 19 Dec 2024 20:39:30 +0200 Subject: [PATCH 2/3] refactor: remove error logging for 'isows' module import in WebSocket initialization --- apps/js-sdk/firecrawl/src/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 7eef05f8..9e3a849f 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -14,7 +14,6 @@ const WebSocket: typeof IsowsWebSocket | null = await (async () => { const module = await import('isows'); return module.WebSocket; } catch (error) { - console.error("Failed to load 'isows' module:", error); return null; } })(); From f043f5fd61d229f08dbba3f16079061a0f2cecbf Mon Sep 17 00:00:00 2001 From: Thomas Kosmas Date: Sat, 21 Dec 2024 02:27:22 +0200 Subject: [PATCH 3/3] Enhance error handling in E2E tests and introduce CrawlWatcher tests - Updated error messages in E2E tests to provide clearer feedback for blocked URLs and invalid API keys. - Added new test suite for CrawlWatcher to ensure proper instantiation and error handling when WebSocket is unavailable. - Improved test conditions for URL scraping and crawling to reflect updated error responses. --- .../__tests__/e2e_v1_withAuth/index.test.ts | 4 +-- .../src/__tests__/CrawlWatcher.test.ts | 35 +++++++++++++++++++ .../src/__tests__/e2e_withAuth/index.test.ts | 8 ++--- .../__tests__/v1/e2e_withAuth/index.test.ts | 12 +++---- 4 files changed, 47 insertions(+), 12 deletions(-) create mode 100644 apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index 35ee2d89..f5fc5d5d 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -58,7 +58,7 @@ describe("E2E Tests for v1 API Routes", () => { expect(response.statusCode).toBe(403); expect(response.body.error).toBe( - "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.", + "Request failed with status code 403. Error: URL is blocked intentionally. Firecrawl currently does not support scraping this site due to policy restrictions. ", ); }); @@ -757,7 +757,7 @@ describe("E2E Tests for v1 API Routes", () => { expect(response.statusCode).toBe(403); expect(response.body.error).toBe( - "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.", + "Request failed with status code 403. Error: URL is blocked intentionally. Firecrawl currently does not support scraping this site due to policy restrictions. ", ); }); diff --git a/apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts b/apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts new file mode 100644 index 00000000..7f53828d --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts @@ -0,0 +1,35 @@ +import { jest } from '@jest/globals'; + +describe('CrawlWatcher', () => { + const mockApiUrl = 'https://api.firecrawl.dev'; + const mockApiKey = 'test-api-key'; + + beforeEach(() => { + jest.resetModules(); + }); + + test('should create a CrawlWatcher instance successfully when isows is available', async () => { + await jest.unstable_mockModule('isows', () => ({ + WebSocket: jest.fn(), + })); + + const { default: FirecrawlApp, CrawlWatcher } = await import('../index'); + const app = new FirecrawlApp({ apiKey: mockApiKey, apiUrl: mockApiUrl }); + + const watcher = new CrawlWatcher('test-id', app); + expect(watcher).toBeInstanceOf(CrawlWatcher); + }); + + test('should throw when WebSocket is not available (isows import fails)', async () => { + await jest.unstable_mockModule('isows', () => { + throw new Error('Module not found'); + }); + + const { default: FirecrawlApp, CrawlWatcher, FirecrawlError } = await import('../index'); + const app = new FirecrawlApp({ apiKey: mockApiKey, apiUrl: mockApiUrl }); + + expect(() => { + new CrawlWatcher('test-id', app); + }).toThrow(FirecrawlError); + }); +}); diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index 7d107afe..6db51775 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -32,7 +32,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { }); await expect( invalidApp.scrapeUrl("https://roastmywebsite.ai") - ).rejects.toThrow("Request failed with status code 401"); + ).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401"); } ); @@ -46,7 +46,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { }); const blocklistedUrl = "https://facebook.com/fake-test"; await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow( - "Request failed with status code 403" + "Unexpected error occurred while trying to scrape URL. Status code: 403" ); } ); @@ -169,7 +169,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { }); const blocklistedUrl = "https://twitter.com/fake-test"; await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow( - "Request failed with status code 403" + "Unexpected error occurred while trying to scrape URL. Status code: 403" ); } ); @@ -242,7 +242,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { const maxChecks = 15; let checks = 0; - while (statusResponse.status === "active" && checks < maxChecks) { + while ((statusResponse.status === "active" || statusResponse.status === "scraping" ) && checks < maxChecks) { await new Promise((resolve) => setTimeout(resolve, 5000)); expect(statusResponse.partial_data).not.toBeNull(); // expect(statusResponse.current).toBeGreaterThanOrEqual(1); diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index dea55846..76dc7f73 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -17,13 +17,13 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should throw error for invalid API key on scrape', async () => { const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); + await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401"); }); test.concurrent('should throw error for blocklisted URL on scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const blocklistedUrl = "https://facebook.com/fake-test"; - await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 403"); }); test.concurrent('should return successful response with valid preview token', async () => { @@ -61,7 +61,7 @@ describe('FirecrawlApp E2E Tests', () => { 'https://roastmywebsite.ai', { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], headers: { "x-key": "test" }, - includeTags: ['h1'], + // includeTags: ['h1'], excludeTags: ['h2'], onlyMainContent: true, timeout: 30000, @@ -162,7 +162,7 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should throw error for blocklisted URL on crawl', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const blocklistedUrl = "https://twitter.com/fake-test"; - await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."); + await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403. Error: This website is no longer supported, please reach out to help@firecrawl.com for more info on how to activate it on your account. "); }); test.concurrent('should return successful response for crawl and wait for completion', async () => { @@ -212,7 +212,7 @@ describe('FirecrawlApp E2E Tests', () => { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], headers: { "x-key": "test" }, - includeTags: ['h1'], + // includeTags: ['h1'], excludeTags: ['h2'], onlyMainContent: true, waitFor: 1000 @@ -334,7 +334,7 @@ describe('FirecrawlApp E2E Tests', () => { expect(statusResponse.data[0].metadata).not.toHaveProperty("error"); } } - }, 60000); // 60 seconds timeout + }, 120000); // 120 seconds timeout test.concurrent('should throw error for invalid API key on map', async () => { const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });