From e776847c71a393d9fc49f6e1883d3911170a5ba7 Mon Sep 17 00:00:00 2001 From: RutamBhagat Date: Tue, 17 Dec 2024 11:00:13 -0800 Subject: [PATCH 1/2] feat(js-sdk): improve API key handling for cloud vs self-hosted services in FirecrawlApp --- .../firecrawl/src/__tests__/index.test.ts | 27 +++++++---- .../__tests__/v1/e2e_withAuth/index.test.ts | 46 ++++++++++++++----- apps/js-sdk/firecrawl/src/index.ts | 12 +++-- 3 files changed, 61 insertions(+), 24 deletions(-) diff --git a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts index 92951237..6958abf8 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts @@ -1,9 +1,9 @@ -import { describe, test, expect, jest } from '@jest/globals'; -import axios from 'axios'; -import FirecrawlApp from '../index'; +import { describe, expect, jest, test } from '@jest/globals'; -import { readFile } from 'fs/promises'; +import FirecrawlApp from '../index'; +import axios from 'axios'; import { join } from 'path'; +import { readFile } from 'fs/promises'; // Mock jest and set the type jest.mock('axios'); @@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise { return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8') } +const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev"; + describe('the firecrawl JS SDK', () => { - test('Should require an API key to instantiate FirecrawlApp', async () => { - const fn = () => { - new FirecrawlApp({ apiKey: undefined }); - }; - expect(fn).toThrow('No API key provided'); + test('Should require an API key only for cloud service', async () => { + if (API_URL.includes('api.firecrawl.dev')) { + // Should throw for cloud service + expect(() => { + new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL }); + }).toThrow('No API key provided'); + } else { + // Should not throw for self-hosted + expect(() => { + new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL }); + }).not.toThrow(); + } }); test('Should return scraped data from a /scrape API call', async () => { diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index dea55846..60d0b44f 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY; const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev"; describe('FirecrawlApp E2E Tests', () => { - test.concurrent('should throw error for no API key', async () => { - expect(() => { - new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); - }).toThrow("No API key provided"); + test.concurrent('should throw error for no API key only for cloud service', async () => { + if (API_URL.includes('api.firecrawl.dev')) { + // Should throw for cloud service + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).toThrow("No API key provided"); + } else { + // Should not throw for self-hosted + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).not.toThrow(); + } }); test.concurrent('should throw error for invalid API key on scrape', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); + if (API_URL.includes('api.firecrawl.dev')) { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404"); + } else { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); + } }); test.concurrent('should throw error for blocklisted URL on scrape', async () => { @@ -155,8 +168,13 @@ describe('FirecrawlApp E2E Tests', () => { }, 30000); // 30 seconds timeout test.concurrent('should throw error for invalid API key on crawl', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); + if (API_URL.includes('api.firecrawl.dev')) { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404"); + } else { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); + } }); test.concurrent('should throw error for blocklisted URL on crawl', async () => { @@ -337,8 +355,13 @@ describe('FirecrawlApp E2E Tests', () => { }, 60000); // 60 seconds timeout test.concurrent('should throw error for invalid API key on map', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); + if (API_URL.includes('api.firecrawl.dev')) { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404"); + } else { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); + } }); test.concurrent('should throw error for blocklisted URL on map', async () => { @@ -355,8 +378,7 @@ describe('FirecrawlApp E2E Tests', () => { }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid map', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; expect(response).not.toBeNull(); expect(response.links?.length).toBeGreaterThan(0); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 020a2293..6d9a0a73 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -289,17 +289,23 @@ export default class FirecrawlApp { public apiKey: string; public apiUrl: string; + private isCloudService(url: string): boolean { + return url.includes('api.firecrawl.dev'); + } + /** * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { - if (typeof apiKey !== "string") { + const baseUrl = apiUrl || "https://api.firecrawl.dev"; + + if (this.isCloudService(baseUrl) && typeof apiKey !== "string") { throw new FirecrawlError("No API key provided", 401); } - this.apiKey = apiKey; - this.apiUrl = apiUrl || "https://api.firecrawl.dev"; + this.apiKey = apiKey || ''; + this.apiUrl = baseUrl; } /** From 7366f36e397669fcb4260617707f63aa38ced375 Mon Sep 17 00:00:00 2001 From: RutamBhagat Date: Sat, 21 Dec 2024 07:03:16 -0800 Subject: [PATCH 2/2] docs(CONTRIBUTING.md): Add Docker Compose setup instructions to CONTRIBUTING.md --- CONTRIBUTING.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 773454e5..ce82236d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -111,6 +111,20 @@ curl -X POST http://localhost:3002/v1/crawl \ }' ``` +### Alternative: Using Docker Compose + +For a simpler setup, you can use Docker Compose to run all services: + +1. Prerequisites: Make sure you have Docker and Docker Compose installed +2. Copy the `.env.example` file to `.env` in the `/apps/api/` directory and configure as needed +3. From the root directory, run: + +```bash +docker compose up +``` + +This will start Redis, the API server, and workers automatically in the correct configuration. + ## Tests: The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.