Merge branch 'main' into feat/sdk-without-ws

This commit is contained in:
Nicolas 2024-12-23 18:41:31 -03:00 committed by GitHub
commit 0c1c4f2ede
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 75 additions and 24 deletions

View File

@ -111,6 +111,20 @@ curl -X POST http://localhost:3002/v1/crawl \
}' }'
``` ```
### Alternative: Using Docker Compose
For a simpler setup, you can use Docker Compose to run all services:
1. Prerequisites: Make sure you have Docker and Docker Compose installed
2. Copy the `.env.example` file to `.env` in the `/apps/api/` directory and configure as needed
3. From the root directory, run:
```bash
docker compose up
```
This will start Redis, the API server, and workers automatically in the correct configuration.
## Tests: ## Tests:
The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication. The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.

View File

@ -1,9 +1,9 @@
import { describe, test, expect, jest } from '@jest/globals'; import { describe, expect, jest, test } from '@jest/globals';
import axios from 'axios';
import FirecrawlApp from '../index';
import { readFile } from 'fs/promises'; import FirecrawlApp from '../index';
import axios from 'axios';
import { join } from 'path'; import { join } from 'path';
import { readFile } from 'fs/promises';
// Mock jest and set the type // Mock jest and set the type
jest.mock('axios'); jest.mock('axios');
@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8') return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
} }
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
describe('the firecrawl JS SDK', () => { describe('the firecrawl JS SDK', () => {
test('Should require an API key to instantiate FirecrawlApp', async () => { test('Should require an API key only for cloud service', async () => {
const fn = () => { if (API_URL.includes('api.firecrawl.dev')) {
new FirecrawlApp({ apiKey: undefined }); // Should throw for cloud service
}; expect(() => {
expect(fn).toThrow('No API key provided'); new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
}).toThrow('No API key provided');
} else {
// Should not throw for self-hosted
expect(() => {
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
}).not.toThrow();
}
}); });
test('Should return scraped data from a /scrape API call', async () => { test('Should return scraped data from a /scrape API call', async () => {

View File

@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev"; const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
describe('FirecrawlApp E2E Tests', () => { describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for no API key', async () => { test.concurrent('should throw error for no API key only for cloud service', async () => {
expect(() => { if (API_URL.includes('api.firecrawl.dev')) {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); // Should throw for cloud service
}).toThrow("No API key provided"); expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided");
} else {
// Should not throw for self-hosted
expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).not.toThrow();
}
}); });
test.concurrent('should throw error for invalid API key on scrape', async () => { test.concurrent('should throw error for invalid API key on scrape', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); if (API_URL.includes('api.firecrawl.dev')) {
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401"); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
} else {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
}
}); });
test.concurrent('should throw error for blocklisted URL on scrape', async () => { test.concurrent('should throw error for blocklisted URL on scrape', async () => {
@ -155,8 +168,13 @@ describe('FirecrawlApp E2E Tests', () => {
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should throw error for invalid API key on crawl', async () => { test.concurrent('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); if (API_URL.includes('api.firecrawl.dev')) {
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
} else {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
}
}); });
test.concurrent('should return successful response for crawl and wait for completion', async () => { test.concurrent('should return successful response for crawl and wait for completion', async () => {
@ -331,8 +349,13 @@ describe('FirecrawlApp E2E Tests', () => {
}, 120000); // 120 seconds timeout }, 120000); // 120 seconds timeout
test.concurrent('should throw error for invalid API key on map', async () => { test.concurrent('should throw error for invalid API key on map', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); if (API_URL.includes('api.firecrawl.dev')) {
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
} else {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
}
}); });
test.concurrent('should throw error for blocklisted URL on map', async () => { test.concurrent('should throw error for blocklisted URL on map', async () => {
@ -349,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid map', async () => { test.concurrent('should return successful response for valid map', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.links?.length).toBeGreaterThan(0); expect(response.links?.length).toBeGreaterThan(0);

View File

@ -306,17 +306,23 @@ export default class FirecrawlApp {
public apiKey: string; public apiKey: string;
public apiUrl: string; public apiUrl: string;
private isCloudService(url: string): boolean {
return url.includes('api.firecrawl.dev');
}
/** /**
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance. * @param config - Configuration options for the FirecrawlApp instance.
*/ */
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
if (typeof apiKey !== "string") { const baseUrl = apiUrl || "https://api.firecrawl.dev";
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
throw new FirecrawlError("No API key provided", 401); throw new FirecrawlError("No API key provided", 401);
} }
this.apiKey = apiKey; this.apiKey = apiKey || '';
this.apiUrl = apiUrl || "https://api.firecrawl.dev"; this.apiUrl = baseUrl;
} }
/** /**