diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 773454e5..ce82236d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -111,6 +111,20 @@ curl -X POST http://localhost:3002/v1/crawl \ }' ``` +### Alternative: Using Docker Compose + +For a simpler setup, you can use Docker Compose to run all services: + +1. Prerequisites: Make sure you have Docker and Docker Compose installed +2. Copy the `.env.example` file to `.env` in the `/apps/api/` directory and configure as needed +3. From the root directory, run: + +```bash +docker compose up +``` + +This will start Redis, the API server, and workers automatically in the correct configuration. + ## Tests: The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication. diff --git a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts index 92951237..6958abf8 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts @@ -1,9 +1,9 @@ -import { describe, test, expect, jest } from '@jest/globals'; -import axios from 'axios'; -import FirecrawlApp from '../index'; +import { describe, expect, jest, test } from '@jest/globals'; -import { readFile } from 'fs/promises'; +import FirecrawlApp from '../index'; +import axios from 'axios'; import { join } from 'path'; +import { readFile } from 'fs/promises'; // Mock jest and set the type jest.mock('axios'); @@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise { return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8') } +const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev"; + describe('the firecrawl JS SDK', () => { - test('Should require an API key to instantiate FirecrawlApp', async () => { - const fn = () => { - new FirecrawlApp({ apiKey: undefined }); - }; - expect(fn).toThrow('No API key provided'); + test('Should require an API key only for cloud service', async () => { + if (API_URL.includes('api.firecrawl.dev')) { + // Should throw for cloud service + expect(() => { + new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL }); + }).toThrow('No API key provided'); + } else { + // Should not throw for self-hosted + expect(() => { + new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL }); + }).not.toThrow(); + } }); test('Should return scraped data from a /scrape API call', async () => { diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index b883550c..4f3a9cb2 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY; const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev"; describe('FirecrawlApp E2E Tests', () => { - test.concurrent('should throw error for no API key', async () => { - expect(() => { - new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); - }).toThrow("No API key provided"); + test.concurrent('should throw error for no API key only for cloud service', async () => { + if (API_URL.includes('api.firecrawl.dev')) { + // Should throw for cloud service + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).toThrow("No API key provided"); + } else { + // Should not throw for self-hosted + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).not.toThrow(); + } }); test.concurrent('should throw error for invalid API key on scrape', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401"); + if (API_URL.includes('api.firecrawl.dev')) { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404"); + } else { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); + } }); test.concurrent('should throw error for blocklisted URL on scrape', async () => { @@ -155,8 +168,13 @@ describe('FirecrawlApp E2E Tests', () => { }, 30000); // 30 seconds timeout test.concurrent('should throw error for invalid API key on crawl', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); + if (API_URL.includes('api.firecrawl.dev')) { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404"); + } else { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); + } }); test.concurrent('should return successful response for crawl and wait for completion', async () => { @@ -331,8 +349,13 @@ describe('FirecrawlApp E2E Tests', () => { }, 120000); // 120 seconds timeout test.concurrent('should throw error for invalid API key on map', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); + if (API_URL.includes('api.firecrawl.dev')) { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404"); + } else { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow(); + } }); test.concurrent('should throw error for blocklisted URL on map', async () => { @@ -349,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => { }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid map', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; expect(response).not.toBeNull(); expect(response.links?.length).toBeGreaterThan(0); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 434949ed..feb69f03 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -306,17 +306,23 @@ export default class FirecrawlApp { public apiKey: string; public apiUrl: string; + private isCloudService(url: string): boolean { + return url.includes('api.firecrawl.dev'); + } + /** * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { - if (typeof apiKey !== "string") { + const baseUrl = apiUrl || "https://api.firecrawl.dev"; + + if (this.isCloudService(baseUrl) && typeof apiKey !== "string") { throw new FirecrawlError("No API key provided", 401); } - this.apiKey = apiKey; - this.apiUrl = apiUrl || "https://api.firecrawl.dev"; + this.apiKey = apiKey || ''; + this.apiUrl = baseUrl; } /**