From b1a5625b2208ea34096096bfdd1685f9879a1d1b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 23 Dec 2024 18:45:51 -0300 Subject: [PATCH] Revert "Merge pull request #997 from mendableai/feat/sdk-without-ws" This reverts commit 53cda5f81c53d3de35925c610ce083923ca09fbe, reversing changes made to 51f79b55efadc53243a8c22d86bb2d08d878d524. --- .../src/__tests__/CrawlWatcher.test.ts | 35 ------------------- .../src/__tests__/e2e_withAuth/index.test.ts | 8 ++--- .../__tests__/v1/e2e_withAuth/index.test.ts | 8 ++--- apps/js-sdk/firecrawl/src/index.ts | 20 +---------- 4 files changed, 9 insertions(+), 62 deletions(-) delete mode 100644 apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts diff --git a/apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts b/apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts deleted file mode 100644 index 7f53828d..00000000 --- a/apps/js-sdk/firecrawl/src/__tests__/CrawlWatcher.test.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { jest } from '@jest/globals'; - -describe('CrawlWatcher', () => { - const mockApiUrl = 'https://api.firecrawl.dev'; - const mockApiKey = 'test-api-key'; - - beforeEach(() => { - jest.resetModules(); - }); - - test('should create a CrawlWatcher instance successfully when isows is available', async () => { - await jest.unstable_mockModule('isows', () => ({ - WebSocket: jest.fn(), - })); - - const { default: FirecrawlApp, CrawlWatcher } = await import('../index'); - const app = new FirecrawlApp({ apiKey: mockApiKey, apiUrl: mockApiUrl }); - - const watcher = new CrawlWatcher('test-id', app); - expect(watcher).toBeInstanceOf(CrawlWatcher); - }); - - test('should throw when WebSocket is not available (isows import fails)', async () => { - await jest.unstable_mockModule('isows', () => { - throw new Error('Module not found'); - }); - - const { default: FirecrawlApp, CrawlWatcher, FirecrawlError } = await import('../index'); - const app = new FirecrawlApp({ apiKey: mockApiKey, apiUrl: mockApiUrl }); - - expect(() => { - new CrawlWatcher('test-id', app); - }).toThrow(FirecrawlError); - }); -}); diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index 6db51775..7d107afe 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -32,7 +32,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { }); await expect( invalidApp.scrapeUrl("https://roastmywebsite.ai") - ).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401"); + ).rejects.toThrow("Request failed with status code 401"); } ); @@ -46,7 +46,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { }); const blocklistedUrl = "https://facebook.com/fake-test"; await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow( - "Unexpected error occurred while trying to scrape URL. Status code: 403" + "Request failed with status code 403" ); } ); @@ -169,7 +169,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { }); const blocklistedUrl = "https://twitter.com/fake-test"; await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow( - "Unexpected error occurred while trying to scrape URL. Status code: 403" + "Request failed with status code 403" ); } ); @@ -242,7 +242,7 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => { const maxChecks = 15; let checks = 0; - while ((statusResponse.status === "active" || statusResponse.status === "scraping" ) && checks < maxChecks) { + while (statusResponse.status === "active" && checks < maxChecks) { await new Promise((resolve) => setTimeout(resolve, 5000)); expect(statusResponse.partial_data).not.toBeNull(); // expect(statusResponse.current).toBeGreaterThanOrEqual(1); diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index 4f3a9cb2..e5c04209 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -36,7 +36,7 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should throw error for blocklisted URL on scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const blocklistedUrl = "https://facebook.com/fake-test"; - await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 403"); + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); }); test.concurrent('should return successful response with valid preview token', async () => { @@ -74,7 +74,7 @@ describe('FirecrawlApp E2E Tests', () => { 'https://roastmywebsite.ai', { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], headers: { "x-key": "test" }, - // includeTags: ['h1'], + includeTags: ['h1'], excludeTags: ['h2'], onlyMainContent: true, timeout: 30000, @@ -224,7 +224,7 @@ describe('FirecrawlApp E2E Tests', () => { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], headers: { "x-key": "test" }, - // includeTags: ['h1'], + includeTags: ['h1'], excludeTags: ['h2'], onlyMainContent: true, waitFor: 1000 @@ -346,7 +346,7 @@ describe('FirecrawlApp E2E Tests', () => { expect(statusResponse.data[0].metadata).not.toHaveProperty("error"); } } - }, 120000); // 120 seconds timeout + }, 60000); // 60 seconds timeout test.concurrent('should throw error for invalid API key on map', async () => { if (API_URL.includes('api.firecrawl.dev')) { diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index feb69f03..d3ae630b 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,23 +1,7 @@ import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios"; import type * as zt from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; - -import type { WebSocket as IsowsWebSocket } from 'isows'; -/** - * Dynamically imports the WebSocket class from 'isows'. - * If the import fails, WebSocket is set to null. - * This approach is used because some environments, such as Firebase Functions, - * might not support WebSocket natively. - */ -const WebSocket: typeof IsowsWebSocket | null = await (async () => { - try { - const module = await import('isows'); - return module.WebSocket; - } catch (error) { - return null; - } -})(); - +import { WebSocket } from "isows"; import { TypedEventTarget } from "typescript-event-target"; /** @@ -961,8 +945,6 @@ export class CrawlWatcher extends TypedEventTarget { constructor(id: string, app: FirecrawlApp) { super(); - if(!WebSocket) - throw new FirecrawlError("WebSocket module failed to load. Your system might not support WebSocket.", 500); this.id = id; this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); this.status = "scraping";