Merge branch 'main' into feat/queue-scrapes

This commit is contained in:
Gergő Móricz 2024-08-13 20:53:49 +02:00 committed by GitHub
commit 4a2c37dcf5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 1581 additions and 209 deletions

View File

@ -1,7 +1,7 @@
name: Fly Deploy Direct
on:
schedule:
- cron: '0 */6 * * *'
- cron: '0 */2 * * *'
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

View File

@ -169,6 +169,41 @@ jobs:
run: npm run test
working-directory: ./apps/js-sdk/firecrawl
go-sdk-tests:
name: Go SDK Tests
needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
services:
redis:
image: redis
ports:
- 6379:6379
steps:
- uses: actions/checkout@v3
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: "go.mod"
- name: Install pnpm
run: npm install -g pnpm
- name: Install dependencies
run: pnpm install
working-directory: ./apps/api
- name: Start the application
run: npm start &
working-directory: ./apps/api
id: start_app
- name: Start workers
run: npm run workers &
working-directory: ./apps/api
id: start_workers
- name: Install dependencies for Go SDK
run: go mod tidy
working-directory: ./apps/go-sdk
- name: Run tests for Go SDK
run: go test -v ./... -timeout 180s
working-directory: ./apps/go-sdk/firecrawl
deploy:
name: Deploy app
runs-on: ubuntu-latest

View File

@ -160,7 +160,7 @@ Errors related to connecting to Redis, such as timeouts or "Connection refused".
**Solution:**
- Ensure that the Redis service is up and running in your Docker environment.
- Verify that the REDIS_URL and REDIS_RATE_LIMIT_URL in your .env file point to the correct Redis instance.
- Verify that the REDIS_URL and REDIS_RATE_LIMIT_URL in your .env file point to the correct Redis instance, ensure that it points to the same URL in the `docker-compose.yaml` file (`redis://redis:6379`)
- Check network settings and firewall rules that may block the connection to the Redis port.
### API endpoint does not respond

View File

@ -2,8 +2,8 @@
NUM_WORKERS_PER_QUEUE=8
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://localhost:6379
REDIS_RATE_LIMIT_URL=redis://localhost:6379
REDIS_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
REDIS_RATE_LIMIT_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html
## To turn on DB authentication, you need to set up supabase.
@ -17,18 +17,29 @@ SUPABASE_URL=
SUPABASE_SERVICE_TOKEN=
# Other Optionals
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
RATE_LIMIT_TEST_API_KEY_SCRAPE= # set if you'd like to test the scraping rate limit
RATE_LIMIT_TEST_API_KEY_CRAWL= # set if you'd like to test the crawling rate limit
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
BULL_AUTH_KEY= @
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
POSTHOG_HOST= # set if you'd like to send posthog events like job logs
# use if you've set up authentication and want to test with a real API key
TEST_API_KEY=
# set if you'd like to test the scraping rate limit
RATE_LIMIT_TEST_API_KEY_SCRAPE=
# set if you'd like to test the crawling rate limit
RATE_LIMIT_TEST_API_KEY_CRAWL=
# set if you'd like to use scraping Be to handle JS blocking
SCRAPING_BEE_API_KEY=
# add for LLM dependednt features (image alt generation, etc.)
OPENAI_API_KEY=
BULL_AUTH_KEY=@
# use if you're configuring basic logging with logtail
LOGTAIL_KEY=
# set if you have a llamaparse key you'd like to use to parse pdfs
LLAMAPARSE_API_KEY=
# set if you have a serper key you'd like to use as a search api
SERPER_API_KEY=
# set if you'd like to send slack server health status messages
SLACK_WEBHOOK_URL=
# set if you'd like to send posthog events like job logs
POSTHOG_API_KEY=
# set if you'd like to send posthog events like job logs
POSTHOG_HOST=
STRIPE_PRICE_ID_STANDARD=
STRIPE_PRICE_ID_SCALE=
@ -43,7 +54,8 @@ STRIPE_PRICE_ID_GROWTH_YEARLY=
HYPERDX_API_KEY=
HDX_NODE_BETA_MODE=1
FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta
# set if you'd like to use the fire engine closed beta
FIRE_ENGINE_BETA_URL=
# Proxy Settings for Playwright (Alternative you can can use a proxy service like oxylabs, which rotates IPs for you on every request)
PROXY_SERVER=

View File

@ -308,7 +308,10 @@ describe("E2E Tests for API Routes", () => {
}
}
const completedResponse = response;
await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const urls = completedResponse.body.data.map(
(item: any) => item.metadata?.sourceURL
@ -360,7 +363,10 @@ describe("E2E Tests for API Routes", () => {
}
}
const completedResponse: FirecrawlCrawlStatusResponse = response;
await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const urls = completedResponse.body.data.map(
(item: any) => item.metadata?.sourceURL
@ -478,7 +484,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body).toHaveProperty("success");
expect(response.body.success).toBe(true);
expect(response.body).toHaveProperty("data");
}, 30000); // 30 seconds timeout
}, 60000); // 60 seconds timeout
});
describe("GET /v0/crawl/status/:jobId", () => {
@ -510,7 +516,6 @@ describe("E2E Tests for API Routes", () => {
expect(crawlResponse.statusCode).toBe(200);
let isCompleted = false;
let completedResponse;
while (!isCompleted) {
const response = await request(TEST_URL)
@ -521,11 +526,16 @@ describe("E2E Tests for API Routes", () => {
if (response.body.status === "completed") {
isCompleted = true;
completedResponse = response;
} else {
await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
}
}
await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(completedResponse.body).toHaveProperty("status");
expect(completedResponse.body.status).toBe("completed");
expect(completedResponse.body).toHaveProperty("data");
@ -616,7 +626,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body).toHaveProperty("status");
expect(completedResponse.body.status).toBe("failed");
expect(completedResponse.body).toHaveProperty("data");
expect(completedResponse.body.data).toBeNull();
let isNullOrEmptyArray = false;
if (completedResponse.body.data === null || completedResponse.body.data.length === 0) {
isNullOrEmptyArray = true;
}
expect(isNullOrEmptyArray).toBe(true);
expect(completedResponse.body.data).toEqual(expect.arrayContaining([]));
expect(completedResponse.body).toHaveProperty("partial_data");
expect(completedResponse.body.partial_data[0]).toHaveProperty("content");
expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown");
@ -676,61 +692,4 @@ describe("E2E Tests for API Routes", () => {
expect(typeof llmExtraction.is_open_source).toBe("boolean");
}, 60000); // 60 secs
});
describe("POST /v0/crawl with fast mode", () => {
it.concurrent("should complete the crawl under 20 seconds", async () => {
const startTime = Date.now();
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
url: "https://flutterbricks.com",
crawlerOptions: {
mode: "fast"
}
});
expect(crawlResponse.statusCode).toBe(200);
const jobId = crawlResponse.body.jobId;
let statusResponse;
let isFinished = false;
while (!isFinished) {
statusResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(statusResponse.statusCode).toBe(200);
isFinished = statusResponse.body.status === "completed";
if (!isFinished) {
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
}
}
// const endTime = Date.now();
// const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
// console.log(`Time elapsed: ${timeElapsed} seconds`);
expect(statusResponse.body.status).toBe("completed");
expect(statusResponse.body).toHaveProperty("data");
expect(statusResponse.body.data[0]).toHaveProperty("content");
expect(statusResponse.body.data[0]).toHaveProperty("markdown");
expect(statusResponse.body.data[0]).toHaveProperty("metadata");
expect(statusResponse.body.data[0].metadata.pageStatusCode).toBe(200);
expect(statusResponse.body.data[0].metadata.pageError).toBeUndefined();
const results = statusResponse.body.data;
// results.forEach((result, i) => {
// console.log(result.metadata.sourceURL);
// });
expect(results.length).toBeGreaterThanOrEqual(10);
expect(results.length).toBeLessThanOrEqual(15);
}, 20000);
});
});

View File

@ -1,26 +1,77 @@
import { parseApi } from "../../src/lib/parseApi";
import { getRateLimiter, } from "../../src/services/rate-limiter";
import { AuthResponse, NotificationType, RateLimiterMode } from "../../src/types";
import { getRateLimiter } from "../../src/services/rate-limiter";
import {
AuthResponse,
NotificationType,
RateLimiterMode,
} from "../../src/types";
import { supabase_service } from "../../src/services/supabase";
import { withAuth } from "../../src/lib/withAuth";
import { RateLimiterRedis } from "rate-limiter-flexible";
import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
import { sendNotification } from "../services/notification/email_notification";
import { Logger } from "../lib/logger";
import { redlock } from "../../src/services/redlock";
import { getValue } from "../../src/services/redis";
import { setValue } from "../../src/services/redis";
import { validate } from "uuid";
export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> {
function normalizedApiIsUuid(potentialUuid: string): boolean {
// Check if the string is a valid UUID
return validate(potentialUuid);
}
export async function authenticateUser(
req,
res,
mode?: RateLimiterMode
): Promise<AuthResponse> {
return withAuth(supaAuthenticateUser)(req, res, mode);
}
function setTrace(team_id: string, api_key: string) {
try {
setTraceAttributes({
team_id,
api_key
api_key,
});
} catch (error) {
Logger.error(`Error setting trace attributes: ${error.message}`);
}
}
async function getKeyAndPriceId(normalizedApi: string): Promise<{
success: boolean;
teamId?: string;
priceId?: string;
error?: string;
status?: number;
}> {
const { data, error } = await supabase_service.rpc("get_key_and_price_id_2", {
api_key: normalizedApi,
});
if (error) {
Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`);
return {
success: false,
error:
"The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.",
status: 500,
};
}
if (!data || data.length === 0) {
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
// TODO: change this error code ?
return {
success: false,
error: "Unauthorized: Invalid token",
status: 401,
};
} else {
return {
success: true,
teamId: data[0].team_id,
priceId: data[0].price_id,
};
}
}
export async function supaAuthenticateUser(
req,
@ -51,20 +102,83 @@ export async function supaAuthenticateUser(
const iptoken = incomingIP + token;
let rateLimiter: RateLimiterRedis;
let subscriptionData: { team_id: string, plan: string } | null = null;
let subscriptionData: { team_id: string; plan: string } | null = null;
let normalizedApi: string;
let team_id: string;
let cacheKey = "";
let redLockKey = "";
const lockTTL = 15000; // 10 seconds
let teamId: string | null = null;
let priceId: string | null = null;
if (token == "this_is_just_a_preview_token") {
rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
team_id = "preview";
teamId = "preview";
} else {
normalizedApi = parseApi(token);
if (!normalizedApiIsUuid(normalizedApi)) {
return {
success: false,
error: "Unauthorized: Invalid token",
status: 401,
};
}
cacheKey = `api_key:${normalizedApi}`;
try {
const teamIdPriceId = await getValue(cacheKey);
if (teamIdPriceId) {
const { team_id, price_id } = JSON.parse(teamIdPriceId);
teamId = team_id;
priceId = price_id;
} else {
const {
success,
teamId: tId,
priceId: pId,
error,
status,
} = await getKeyAndPriceId(normalizedApi);
if (!success) {
return { success, error, status };
}
teamId = tId;
priceId = pId;
await setValue(
cacheKey,
JSON.stringify({ team_id: teamId, price_id: priceId }),
10
);
}
} catch (error) {
Logger.error(`Error with auth function: ${error.message}`);
// const {
// success,
// teamId: tId,
// priceId: pId,
// error: e,
// status,
// } = await getKeyAndPriceId(normalizedApi);
// if (!success) {
// return { success, error: e, status };
// }
// teamId = tId;
// priceId = pId;
// const {
// success,
// teamId: tId,
// priceId: pId,
// error: e,
// status,
// } = await getKeyAndPriceId(normalizedApi);
// if (!success) {
// return { success, error: e, status };
// }
// teamId = tId;
// priceId = pId;
}
const { data, error } = await supabase_service.rpc(
'get_key_and_price_id_2', { api_key: normalizedApi }
);
// get_key_and_price_id_2 rpc definition:
// create or replace function get_key_and_price_id_2(api_key uuid)
// returns table(key uuid, team_id uuid, price_id text) as $$
@ -82,46 +196,39 @@ export async function supaAuthenticateUser(
// end;
// $$ language plpgsql;
if (error) {
Logger.warn(`Error fetching key and price_id: ${error.message}`);
} else {
// console.log('Key and Price ID:', data);
}
if (error || !data || data.length === 0) {
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
return {
success: false,
error: "Unauthorized: Invalid token",
status: 401,
};
}
const internal_team_id = data[0].team_id;
team_id = internal_team_id;
const plan = getPlanByPriceId(data[0].price_id);
const plan = getPlanByPriceId(priceId);
// HyperDX Logging
setTrace(team_id, normalizedApi);
setTrace(teamId, normalizedApi);
subscriptionData = {
team_id: team_id,
plan: plan
}
team_id: teamId,
plan: plan,
};
switch (mode) {
case RateLimiterMode.Crawl:
rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token, subscriptionData.plan);
rateLimiter = getRateLimiter(
RateLimiterMode.Crawl,
token,
subscriptionData.plan
);
break;
case RateLimiterMode.Scrape:
rateLimiter = getRateLimiter(RateLimiterMode.Scrape, token, subscriptionData.plan);
rateLimiter = getRateLimiter(
RateLimiterMode.Scrape,
token,
subscriptionData.plan
);
break;
case RateLimiterMode.Search:
rateLimiter = getRateLimiter(RateLimiterMode.Search, token, subscriptionData.plan);
rateLimiter = getRateLimiter(
RateLimiterMode.Search,
token,
subscriptionData.plan
);
break;
case RateLimiterMode.CrawlStatus:
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
break;
case RateLimiterMode.Preview:
rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
break;
@ -134,7 +241,8 @@ export async function supaAuthenticateUser(
}
}
const team_endpoint_token = token === "this_is_just_a_preview_token" ? iptoken : team_id;
const team_endpoint_token =
token === "this_is_just_a_preview_token" ? iptoken : teamId;
try {
await rateLimiter.consume(team_endpoint_token);
@ -147,7 +255,17 @@ export async function supaAuthenticateUser(
const startDate = new Date();
const endDate = new Date();
endDate.setDate(endDate.getDate() + 7);
// await sendNotification(team_id, NotificationType.RATE_LIMIT_REACHED, startDate.toISOString(), endDate.toISOString());
// Cache longer for 429s
if (teamId && priceId && mode !== RateLimiterMode.Preview) {
await setValue(
cacheKey,
JSON.stringify({ team_id: teamId, price_id: priceId }),
60 // 10 seconds, cache for everything
);
}
return {
success: false,
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
@ -157,7 +275,9 @@ export async function supaAuthenticateUser(
if (
token === "this_is_just_a_preview_token" &&
(mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search)
(mode === RateLimiterMode.Scrape ||
mode === RateLimiterMode.Preview ||
mode === RateLimiterMode.Search)
) {
return { success: true, team_id: "preview" };
// check the origin of the request and make sure its from firecrawl.dev
@ -181,8 +301,6 @@ export async function supaAuthenticateUser(
.select("*")
.eq("key", normalizedApi);
if (error || !data || data.length === 0) {
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
return {
@ -195,26 +313,30 @@ export async function supaAuthenticateUser(
subscriptionData = data[0];
}
return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""};
return {
success: true,
team_id: subscriptionData.team_id,
plan: subscriptionData.plan ?? "",
};
}
function getPlanByPriceId(price_id: string) {
switch (price_id) {
case process.env.STRIPE_PRICE_ID_STARTER:
return 'starter';
return "starter";
case process.env.STRIPE_PRICE_ID_STANDARD:
return 'standard';
return "standard";
case process.env.STRIPE_PRICE_ID_SCALE:
return 'scale';
return "scale";
case process.env.STRIPE_PRICE_ID_HOBBY:
case process.env.STRIPE_PRICE_ID_HOBBY_YEARLY:
return 'hobby';
return "hobby";
case process.env.STRIPE_PRICE_ID_STANDARD_NEW:
case process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY:
return 'standardnew';
return "standardnew";
case process.env.STRIPE_PRICE_ID_GROWTH:
case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
return 'growth';
return "growth";
default:
return 'free';
return "free";
}
}
}

View File

@ -80,7 +80,7 @@ export async function searchHelper(
await a.setOptions({
jobId,
mode: "single_urls",
urls: res.map((r) => r.url).slice(0, searchOptions.limit ?? 7),
urls: res.map((r) => r.url).slice(0, Math.min(searchOptions.limit ?? 5, 5)),
crawlerOptions: {
...crawlerOptions,
},
@ -150,7 +150,8 @@ export async function searchController(req: Request, res: Response) {
};
const origin = req.body.origin ?? "api";
const searchOptions = req.body.searchOptions ?? { limit: 7 };
const searchOptions = req.body.searchOptions ?? { limit: 5 };
const jobId = uuidv4();

View File

@ -189,3 +189,4 @@ if (cluster.isMaster) {
// wsq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
// wsq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
// wsq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));

View File

@ -168,11 +168,29 @@ export class WebScraperDataProvider {
private async handleCrawlMode(
inProgress?: (progress: Progress) => void
): Promise<Document[]> {
let includes: string[];
if (Array.isArray(this.includes)) {
if (this.includes[0] != "") {
includes = this.includes;
}
} else {
includes = this.includes.split(',');
}
let excludes: string[];
if (Array.isArray(this.excludes)) {
if (this.excludes[0] != "") {
excludes = this.excludes;
}
} else {
excludes = this.excludes.split(',');
}
const crawler = new WebCrawler({
jobId: this.jobId,
initialUrl: this.urls[0],
includes: Array.isArray(this.includes) ? this.includes : this.includes.split(','),
excludes: Array.isArray(this.excludes) ? this.excludes : this.excludes.split(','),
includes,
excludes,
maxCrawledLinks: this.maxCrawledLinks,
maxCrawledDepth: getAdjustedMaxDepth(this.urls[0], this.maxCrawledDepth),
limit: this.limit,

View File

@ -1,24 +1,11 @@
export const urlSpecificParams = {
"platform.openai.com": {
params: {
wait_browser: "networkidle2",
block_resources: false,
},
headers: {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
referer: "https://www.google.com/",
"accept-language": "en-US,en;q=0.9",
"accept-encoding": "gzip, deflate, br",
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
},
cookies: {
__cf_bm:
"mC1On8P2GWT3A5UeSYH6z_MP94xcTAdZ5jfNi9IT2U0-1714327136-1.0.1.1-ILAP5pSX_Oo9PPo2iHEYCYX.p9a0yRBNLr58GHyrzYNDJ537xYpG50MXxUYVdfrD.h3FV5O7oMlRKGA0scbxaQ",
defaultScraper: "fire-engine",
params:{
wait: 3000,
fireEngineOptions:{
engine: "chrome-cdp"
},
},
},
"support.greenpay.me":{
@ -240,4 +227,12 @@ export const urlSpecificParams = {
},
},
},
"digikey.com":{
defaultScraper: "fire-engine",
params:{
fireEngineOptions:{
engine: "tlsclient",
},
},
}
};

View File

@ -4,37 +4,12 @@ import { sendNotification } from "../notification/email_notification";
import { supabase_service } from "../supabase";
import { Logger } from "../../lib/logger";
import { getValue, setValue } from "../redis";
import Redlock from "redlock";
import Client from "ioredis";
import { redlock } from "../redlock";
const FREE_CREDITS = 500;
const redlock = new Redlock(
// You should have one client for each independent redis node
// or cluster.
[new Client(process.env.REDIS_RATE_LIMIT_URL)],
{
// The expected clock drift; for more details see:
// http://redis.io/topics/distlock
driftFactor: 0.01, // multiplied by lock ttl to determine drift time
// The max number of times Redlock will attempt to lock a resource
// before erroring.
retryCount: 5,
// the time in ms between attempts
retryDelay: 100, // time in ms
// the max time in ms randomly added to retries
// to improve performance under high contention
// see https://www.awsarchitectureblog.com/2015/03/backoff.html
retryJitter: 200, // time in ms
// The minimum remaining time on a lock before an extension is automatically
// attempted with the `using` API.
automaticExtensionThreshold: 500, // time in ms
}
);
export async function billTeam(team_id: string, credits: number) {
return withAuth(supaBillTeam)(team_id, credits);
}

View File

@ -0,0 +1,29 @@
import Redlock from "redlock";
import Client from "ioredis";
export const redlock = new Redlock(
// You should have one client for each independent redis node
// or cluster.
[new Client(process.env.REDIS_RATE_LIMIT_URL)],
{
// The expected clock drift; for more details see:
// http://redis.io/topics/distlock
driftFactor: 0.01, // multiplied by lock ttl to determine drift time
// The max number of times Redlock will attempt to lock a resource
// before erroring.
retryCount: 5,
// the time in ms between attempts
retryDelay: 100, // time in ms
// the max time in ms randomly added to retries
// to improve performance under high contention
// see https://www.awsarchitectureblog.com/2015/03/backoff.html
retryJitter: 200, // time in ms
// The minimum remaining time on a lock before an extension is automatically
// attempted with the `using` API.
automaticExtensionThreshold: 500, // time in ms
}
);

View File

@ -36,17 +36,9 @@ export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(),
{
get: function (target, prop, receiver) {
if (process.env.USE_DB_AUTHENTICATION === "false") {
Logger.debug(
"Attempted to access Supabase client when it's not configured."
);
}
const client = target.getClient();
// If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
if (client === null) {
Logger.error(
"Attempted to access Supabase client when it's not configured."
);
return () => {
throw new Error("Supabase client is not configured.");
};

2
apps/go-sdk/.env.example Normal file
View File

@ -0,0 +1,2 @@
API_URL=http://localhost:3002
TEST_API_KEY=fc-YOUR-API-KEY

21
apps/go-sdk/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Sideguide Technologies Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

189
apps/go-sdk/README.md Normal file
View File

@ -0,0 +1,189 @@
# Firecrawl Go SDK
The Firecrawl Go SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
## Installation
To install the Firecrawl Go SDK, you can
```bash
go get github.com/mendableai/firecrawl
```
## Usage
1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
Here's an example of how to use the SDK with error handling:
```go
import (
"fmt"
"log"
"github.com/mendableai/firecrawl/firecrawl"
)
func main() {
// Initialize the FirecrawlApp with your API key
app, err := firecrawl.NewFirecrawlApp("YOUR_API_KEY")
if err != nil {
log.Fatalf("Failed to initialize FirecrawlApp: %v", err)
}
// Scrape a single URL
url := "https://mendable.ai"
scrapedData, err := app.ScrapeURL(url, nil)
if err != nil {
log.Fatalf("Error occurred while scraping: %v", err)
}
fmt.Println(scrapedData)
// Crawl a website
crawlUrl := "https://mendable.ai"
params := map[string]any{
"pageOptions": map[string]any{
"onlyMainContent": true,
},
}
crawlResult, err := app.CrawlURL(crawlUrl, params)
if err != nil {
log.Fatalf("Error occurred while crawling: %v", err)
}
fmt.Println(crawlResult)
}
```
### Scraping a URL
To scrape a single URL with error handling, use the `ScrapeURL` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
```go
url := "https://mendable.ai"
scrapedData, err := app.ScrapeURL(url, nil)
if err != nil {
log.Fatalf("Failed to scrape URL: %v", err)
}
fmt.Println(scrapedData)
```
### Extracting structured data from a URL
With LLM extraction, you can easily extract structured data from any URL. Here is how you to use it:
```go
jsonSchema := map[string]any{
"type": "object",
"properties": map[string]any{
"top": map[string]any{
"type": "array",
"items": map[string]any{
"type": "object",
"properties": map[string]any{
"title": map[string]string{"type": "string"},
"points": map[string]string{"type": "number"},
"by": map[string]string{"type": "string"},
"commentsURL": map[string]string{"type": "string"},
},
"required": []string{"title", "points", "by", "commentsURL"},
},
"minItems": 5,
"maxItems": 5,
"description": "Top 5 stories on Hacker News",
},
},
"required": []string{"top"},
}
llmExtractionParams := map[string]any{
"extractorOptions": firecrawl.ExtractorOptions{
ExtractionSchema: jsonSchema,
},
}
scrapeResult, err := app.ScrapeURL("https://news.ycombinator.com", llmExtractionParams)
if err != nil {
log.Fatalf("Failed to perform LLM extraction: %v", err)
}
fmt.Println(scrapeResult)
```
### Search for a query
To search the web, get the most relevant results, scrap each page and return the markdown, use the `Search` method. The method takes the query as a parameter and returns the search results.
```go
query := "what is mendable?"
searchResult, err := app.Search(query)
if err != nil {
log.Fatalf("Failed to search: %v", err)
}
fmt.Println(searchResult)
```
### Crawling a Website
To crawl a website, use the `CrawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
```go
crawlParams := map[string]any{
"crawlerOptions": map[string]any{
"excludes": []string{"blog/*"},
"includes": []string{}, // leave empty for all pages
"limit": 1000,
},
"pageOptions": map[string]any{
"onlyMainContent": true,
},
}
crawlResult, err := app.CrawlURL("mendable.ai", crawlParams, true, 2, idempotencyKey)
if err != nil {
log.Fatalf("Failed to crawl URL: %v", err)
}
fmt.Println(crawlResult)
```
### Checking Crawl Status
To check the status of a crawl job, use the `CheckCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
```go
status, err := app.CheckCrawlStatus(jobId)
if err != nil {
log.Fatalf("Failed to check crawl status: %v", err)
}
fmt.Println(status)
```
### Canceling a Crawl Job
To cancel a crawl job, use the `CancelCrawlJob` method. It takes the job ID as a parameter and returns the cancellation status of the crawl job.
```go
canceled, err := app.CancelCrawlJob(jobId)
if err != nil {
log.Fatalf("Failed to cancel crawl job: %v", err)
}
fmt.Println(canceled)
```
## Error Handling
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
## Contributing
Contributions to the Firecrawl Go SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
## License
The Firecrawl Go SDK is licensed under the MIT License. This means you are free to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the SDK, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Please note that while this SDK is MIT licensed, it is part of a larger project which may be under different licensing terms. Always refer to the license information in the root directory of the main project for overall licensing details.

View File

@ -0,0 +1,87 @@
package main
import (
"encoding/json"
"fmt"
"log"
"github.com/google/uuid"
"github.com/mendableai/firecrawl/firecrawl"
)
func main() {
app, err := firecrawl.NewFirecrawlApp("fc-YOUR-API-KEY", "http://localhost:3002")
if err != nil {
log.Fatalf("Failed to create FirecrawlApp: %v", err)
}
// Scrape a website
scrapeResult, err := app.ScrapeURL("firecrawl.dev", nil)
if err != nil {
log.Fatalf("Failed to scrape URL: %v", err)
}
fmt.Println(scrapeResult.Markdown)
// Crawl a website
idempotencyKey := uuid.New().String() // optional idempotency key
crawlParams := map[string]any{
"crawlerOptions": map[string]any{
"excludes": []string{"blog/*"},
},
}
crawlResult, err := app.CrawlURL("mendable.ai", crawlParams, true, 2, idempotencyKey)
if err != nil {
log.Fatalf("Failed to crawl URL: %v", err)
}
jsonCrawlResult, err := json.MarshalIndent(crawlResult, "", " ")
if err != nil {
log.Fatalf("Failed to marshal crawl result: %v", err)
}
fmt.Println(string(jsonCrawlResult))
// LLM Extraction using JSON schema
jsonSchema := map[string]any{
"type": "object",
"properties": map[string]any{
"top": map[string]any{
"type": "array",
"items": map[string]any{
"type": "object",
"properties": map[string]any{
"title": map[string]string{"type": "string"},
"points": map[string]string{"type": "number"},
"by": map[string]string{"type": "string"},
"commentsURL": map[string]string{"type": "string"},
},
"required": []string{"title", "points", "by", "commentsURL"},
},
"minItems": 5,
"maxItems": 5,
"description": "Top 5 stories on Hacker News",
},
},
"required": []string{"top"},
}
llmExtractionParams := map[string]any{
"extractorOptions": firecrawl.ExtractorOptions{
ExtractionSchema: jsonSchema,
Mode: "llm-extraction",
},
"pageOptions": map[string]any{
"onlyMainContent": true,
},
}
llmExtractionResult, err := app.ScrapeURL("https://news.ycombinator.com", llmExtractionParams)
if err != nil {
log.Fatalf("Failed to perform LLM extraction: %v", err)
}
// Pretty print the LLM extraction result
jsonResult, err := json.MarshalIndent(llmExtractionResult.LLMExtraction, "", " ")
if err != nil {
log.Fatalf("Failed to marshal LLM extraction result: %v", err)
}
fmt.Println(string(jsonResult))
}

View File

@ -0,0 +1,10 @@
module github.com/mendableai/firecrawl/apps/go-sdk/examples
go 1.22.5
replace github.com/mendableai/firecrawl => ../
require (
github.com/google/uuid v1.6.0
github.com/mendableai/firecrawl v0.0.0-00010101000000-000000000000
)

View File

@ -0,0 +1,12 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -0,0 +1,584 @@
// Package firecrawl provides a client for interacting with the Firecrawl API.
package firecrawl
import (
"bytes"
"encoding/json"
"fmt"
"io"
"math"
"net/http"
"os"
"time"
)
// FirecrawlDocumentMetadata represents metadata for a Firecrawl document
type FirecrawlDocumentMetadata struct {
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
Language string `json:"language,omitempty"`
Keywords string `json:"keywords,omitempty"`
Robots string `json:"robots,omitempty"`
OGTitle string `json:"ogTitle,omitempty"`
OGDescription string `json:"ogDescription,omitempty"`
OGURL string `json:"ogUrl,omitempty"`
OGImage string `json:"ogImage,omitempty"`
OGAudio string `json:"ogAudio,omitempty"`
OGDeterminer string `json:"ogDeterminer,omitempty"`
OGLocale string `json:"ogLocale,omitempty"`
OGLocaleAlternate []string `json:"ogLocaleAlternate,omitempty"`
OGSiteName string `json:"ogSiteName,omitempty"`
OGVideo string `json:"ogVideo,omitempty"`
DCTermsCreated string `json:"dctermsCreated,omitempty"`
DCDateCreated string `json:"dcDateCreated,omitempty"`
DCDate string `json:"dcDate,omitempty"`
DCTermsType string `json:"dctermsType,omitempty"`
DCType string `json:"dcType,omitempty"`
DCTermsAudience string `json:"dctermsAudience,omitempty"`
DCTermsSubject string `json:"dctermsSubject,omitempty"`
DCSubject string `json:"dcSubject,omitempty"`
DCDescription string `json:"dcDescription,omitempty"`
DCTermsKeywords string `json:"dctermsKeywords,omitempty"`
ModifiedTime string `json:"modifiedTime,omitempty"`
PublishedTime string `json:"publishedTime,omitempty"`
ArticleTag string `json:"articleTag,omitempty"`
ArticleSection string `json:"articleSection,omitempty"`
SourceURL string `json:"sourceURL,omitempty"`
PageStatusCode int `json:"pageStatusCode,omitempty"`
PageError string `json:"pageError,omitempty"`
}
// FirecrawlDocument represents a document in Firecrawl
type FirecrawlDocument struct {
ID string `json:"id,omitempty"`
URL string `json:"url,omitempty"`
Content string `json:"content"`
Markdown string `json:"markdown,omitempty"`
HTML string `json:"html,omitempty"`
LLMExtraction map[string]any `json:"llm_extraction,omitempty"`
CreatedAt *time.Time `json:"createdAt,omitempty"`
UpdatedAt *time.Time `json:"updatedAt,omitempty"`
Type string `json:"type,omitempty"`
Metadata *FirecrawlDocumentMetadata `json:"metadata,omitempty"`
ChildrenLinks []string `json:"childrenLinks,omitempty"`
Provider string `json:"provider,omitempty"`
Warning string `json:"warning,omitempty"`
Index int `json:"index,omitempty"`
}
// ExtractorOptions represents options for extraction.
type ExtractorOptions struct {
Mode string `json:"mode,omitempty"`
ExtractionPrompt string `json:"extractionPrompt,omitempty"`
ExtractionSchema any `json:"extractionSchema,omitempty"`
}
// ScrapeResponse represents the response for scraping operations
type ScrapeResponse struct {
Success bool `json:"success"`
Data *FirecrawlDocument `json:"data,omitempty"`
}
// SearchResponse represents the response for searching operations
type SearchResponse struct {
Success bool `json:"success"`
Data []*FirecrawlDocument `json:"data,omitempty"`
}
// CrawlResponse represents the response for crawling operations
type CrawlResponse struct {
Success bool `json:"success"`
JobID string `json:"jobId,omitempty"`
Data []*FirecrawlDocument `json:"data,omitempty"`
}
// JobStatusResponse represents the response for checking crawl job status
type JobStatusResponse struct {
Success bool `json:"success"`
Status string `json:"status"`
Current int `json:"current,omitempty"`
CurrentURL string `json:"current_url,omitempty"`
CurrentStep string `json:"current_step,omitempty"`
Total int `json:"total,omitempty"`
JobID string `json:"jobId,omitempty"`
Data []*FirecrawlDocument `json:"data,omitempty"`
PartialData []*FirecrawlDocument `json:"partial_data,omitempty"`
}
// CancelCrawlJobResponse represents the response for canceling a crawl job
type CancelCrawlJobResponse struct {
Success bool `json:"success"`
Status string `json:"status"`
}
// requestOptions represents options for making requests.
type requestOptions struct {
retries int
backoff int
}
// requestOption is a functional option type for requestOptions.
type requestOption func(*requestOptions)
// newRequestOptions creates a new requestOptions instance with the provided options.
//
// Parameters:
// - opts: Optional request options.
//
// Returns:
// - *requestOptions: A new instance of requestOptions with the provided options.
func newRequestOptions(opts ...requestOption) *requestOptions {
options := &requestOptions{retries: 1}
for _, opt := range opts {
opt(options)
}
return options
}
// withRetries sets the number of retries for a request.
//
// Parameters:
// - retries: The number of retries to be performed.
//
// Returns:
// - requestOption: A functional option that sets the number of retries for a request.
func withRetries(retries int) requestOption {
return func(opts *requestOptions) {
opts.retries = retries
}
}
// withBackoff sets the backoff interval for a request.
//
// Parameters:
// - backoff: The backoff interval (in milliseconds) to be used for retries.
//
// Returns:
// - requestOption: A functional option that sets the backoff interval for a request.
func withBackoff(backoff int) requestOption {
return func(opts *requestOptions) {
opts.backoff = backoff
}
}
// FirecrawlApp represents a client for the Firecrawl API.
type FirecrawlApp struct {
APIKey string
APIURL string
Client *http.Client
}
// NewFirecrawlApp creates a new instance of FirecrawlApp with the provided API key and API URL.
// If the API key or API URL is not provided, it attempts to retrieve them from environment variables.
// If the API key is still not found, it returns an error.
//
// Parameters:
// - apiKey: The API key for authenticating with the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_KEY environment variable.
// - apiURL: The base URL for the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_URL environment variable, defaulting to "https://api.firecrawl.dev".
//
// Returns:
// - *FirecrawlApp: A new instance of FirecrawlApp configured with the provided or retrieved API key and API URL.
// - error: An error if the API key is not provided or retrieved.
func NewFirecrawlApp(apiKey, apiURL string) (*FirecrawlApp, error) {
if apiKey == "" {
apiKey = os.Getenv("FIRECRAWL_API_KEY")
if apiKey == "" {
return nil, fmt.Errorf("no API key provided")
}
}
if apiURL == "" {
apiURL = os.Getenv("FIRECRAWL_API_URL")
if apiURL == "" {
apiURL = "https://api.firecrawl.dev"
}
}
client := &http.Client{
Timeout: 60 * time.Second,
}
return &FirecrawlApp{
APIKey: apiKey,
APIURL: apiURL,
Client: client,
}, nil
}
// ScrapeURL scrapes the content of the specified URL using the Firecrawl API.
//
// Parameters:
// - url: The URL to be scraped.
// - params: Optional parameters for the scrape request, including extractor options for LLM extraction.
//
// Returns:
// - *FirecrawlDocument: The scraped document data.
// - error: An error if the scrape request fails.
func (app *FirecrawlApp) ScrapeURL(url string, params map[string]any) (*FirecrawlDocument, error) {
headers := app.prepareHeaders("")
scrapeBody := map[string]any{"url": url}
if params != nil {
if extractorOptions, ok := params["extractorOptions"].(ExtractorOptions); ok {
if schema, ok := extractorOptions.ExtractionSchema.(interface{ schema() any }); ok {
extractorOptions.ExtractionSchema = schema.schema()
}
if extractorOptions.Mode == "" {
extractorOptions.Mode = "llm-extraction"
}
scrapeBody["extractorOptions"] = extractorOptions
}
for key, value := range params {
if key != "extractorOptions" {
scrapeBody[key] = value
}
}
}
resp, err := app.makeRequest(
http.MethodPost,
fmt.Sprintf("%s/v0/scrape", app.APIURL),
scrapeBody,
headers,
"scrape URL",
)
if err != nil {
return nil, err
}
var scrapeResponse ScrapeResponse
err = json.Unmarshal(resp, &scrapeResponse)
if err != nil {
return nil, err
}
if scrapeResponse.Success {
return scrapeResponse.Data, nil
}
return nil, fmt.Errorf("failed to scrape URL")
}
// Search performs a search query using the Firecrawl API and returns the search results.
//
// Parameters:
// - query: The search query string.
// - params: Optional parameters for the search request.
//
// Returns:
// - []*FirecrawlDocument: A slice of FirecrawlDocument containing the search results.
// - error: An error if the search request fails.
func (app *FirecrawlApp) Search(query string, params map[string]any) ([]*FirecrawlDocument, error) {
headers := app.prepareHeaders("")
searchBody := map[string]any{"query": query}
for k, v := range params {
searchBody[k] = v
}
resp, err := app.makeRequest(
http.MethodPost,
fmt.Sprintf("%s/v0/search", app.APIURL),
searchBody,
headers,
"search",
)
if err != nil {
return nil, err
}
var searchResponse SearchResponse
err = json.Unmarshal(resp, &searchResponse)
if err != nil {
return nil, err
}
if searchResponse.Success {
return searchResponse.Data, nil
}
return nil, fmt.Errorf("failed to search")
}
// CrawlURL starts a crawl job for the specified URL using the Firecrawl API.
//
// Parameters:
// - url: The URL to crawl.
// - params: Optional parameters for the crawl request.
// - waitUntilDone: If true, the method will wait until the crawl job is completed before returning.
// - pollInterval: The interval (in seconds) at which to poll the job status if waitUntilDone is true.
// - idempotencyKey: An optional idempotency key to ensure the request is idempotent.
//
// Returns:
// - any: The job ID if waitUntilDone is false, or the crawl result if waitUntilDone is true.
// - error: An error if the crawl request fails.
func (app *FirecrawlApp) CrawlURL(url string, params map[string]any, waitUntilDone bool, pollInterval int, idempotencyKey string) (any, error) {
headers := app.prepareHeaders(idempotencyKey)
crawlBody := map[string]any{"url": url}
for k, v := range params {
crawlBody[k] = v
}
resp, err := app.makeRequest(
http.MethodPost,
fmt.Sprintf("%s/v0/crawl", app.APIURL),
crawlBody,
headers,
"start crawl job",
withRetries(3),
withBackoff(500),
)
if err != nil {
return nil, err
}
var crawlResponse CrawlResponse
err = json.Unmarshal(resp, &crawlResponse)
if err != nil {
return nil, err
}
if waitUntilDone {
return app.monitorJobStatus(crawlResponse.JobID, headers, pollInterval)
}
if crawlResponse.JobID == "" {
return nil, fmt.Errorf("failed to get job ID")
}
return crawlResponse.JobID, nil
}
// CheckCrawlStatus checks the status of a crawl job using the Firecrawl API.
//
// Parameters:
// - jobID: The ID of the crawl job to check.
//
// Returns:
// - *JobStatusResponse: The status of the crawl job.
// - error: An error if the crawl status check request fails.
func (app *FirecrawlApp) CheckCrawlStatus(jobID string) (*JobStatusResponse, error) {
headers := app.prepareHeaders("")
resp, err := app.makeRequest(
http.MethodGet,
fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID),
nil,
headers,
"check crawl status",
withRetries(3),
withBackoff(500),
)
if err != nil {
return nil, err
}
var jobStatusResponse JobStatusResponse
err = json.Unmarshal(resp, &jobStatusResponse)
if err != nil {
return nil, err
}
return &jobStatusResponse, nil
}
// CancelCrawlJob cancels a crawl job using the Firecrawl API.
//
// Parameters:
// - jobID: The ID of the crawl job to cancel.
//
// Returns:
// - string: The status of the crawl job after cancellation.
// - error: An error if the crawl job cancellation request fails.
func (app *FirecrawlApp) CancelCrawlJob(jobID string) (string, error) {
headers := app.prepareHeaders("")
resp, err := app.makeRequest(
http.MethodDelete,
fmt.Sprintf("%s/v0/crawl/cancel/%s", app.APIURL, jobID),
nil,
headers,
"cancel crawl job",
)
if err != nil {
return "", err
}
var cancelCrawlJobResponse CancelCrawlJobResponse
err = json.Unmarshal(resp, &cancelCrawlJobResponse)
if err != nil {
return "", err
}
return cancelCrawlJobResponse.Status, nil
}
// prepareHeaders prepares the headers for an HTTP request.
//
// Parameters:
// - idempotencyKey: A string representing the idempotency key to be included in the headers.
// If the idempotency key is an empty string, it will not be included in the headers.
//
// Returns:
// - map[string]string: A map containing the headers for the HTTP request.
func (app *FirecrawlApp) prepareHeaders(idempotencyKey string) map[string]string {
headers := map[string]string{
"Content-Type": "application/json",
"Authorization": fmt.Sprintf("Bearer %s", app.APIKey),
}
if idempotencyKey != "" {
headers["x-idempotency-key"] = idempotencyKey
}
return headers
}
// makeRequest makes a request to the specified URL with the provided method, data, headers, and options.
//
// Parameters:
// - method: The HTTP method to use for the request (e.g., "GET", "POST", "DELETE").
// - url: The URL to send the request to.
// - data: The data to be sent in the request body.
// - headers: The headers to be included in the request.
// - action: A string describing the action being performed.
// - opts: Optional request options.
//
// Returns:
// - []byte: The response body from the request.
// - error: An error if the request fails.
func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, headers map[string]string, action string, opts ...requestOption) ([]byte, error) {
var body []byte
var err error
if data != nil {
body, err = json.Marshal(data)
if err != nil {
return nil, err
}
}
req, err := http.NewRequest(method, url, bytes.NewBuffer(body))
if err != nil {
return nil, err
}
for key, value := range headers {
req.Header.Set(key, value)
}
var resp *http.Response
options := newRequestOptions(opts...)
for i := 0; i < options.retries; i++ {
resp, err = app.Client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 502 {
break
}
time.Sleep(time.Duration(math.Pow(2, float64(i))) * time.Duration(options.backoff) * time.Millisecond)
}
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
statusCode := resp.StatusCode
if statusCode != 200 {
return nil, app.handleError(statusCode, respBody, action)
}
return respBody, nil
}
// monitorJobStatus monitors the status of a crawl job using the Firecrawl API.
//
// Parameters:
// - jobID: The ID of the crawl job to monitor.
// - headers: The headers to be included in the request.
// - pollInterval: The interval (in seconds) at which to poll the job status.
//
// Returns:
// - []*FirecrawlDocument: The crawl result if the job is completed.
// - error: An error if the crawl status check request fails.
func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]string, pollInterval int) ([]*FirecrawlDocument, error) {
attempts := 0
for {
resp, err := app.makeRequest(
http.MethodGet,
fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID),
nil,
headers,
"check crawl status",
withRetries(3),
withBackoff(500),
)
if err != nil {
return nil, err
}
var statusData JobStatusResponse
err = json.Unmarshal(resp, &statusData)
if err != nil {
return nil, err
}
status := statusData.Status
if status == "" {
return nil, fmt.Errorf("invalid status in response")
}
if status == "completed" {
if statusData.Data != nil {
return statusData.Data, nil
}
attempts++
if attempts > 3 {
return nil, fmt.Errorf("crawl job completed but no data was returned")
}
} else if status == "active" || status == "paused" || status == "pending" || status == "queued" || status == "waiting" {
pollInterval = max(pollInterval, 2)
time.Sleep(time.Duration(pollInterval) * time.Second)
} else {
return nil, fmt.Errorf("crawl job failed or was stopped. Status: %s", status)
}
}
}
// handleError handles errors returned by the Firecrawl API.
//
// Parameters:
// - resp: The HTTP response object.
// - body: The response body from the HTTP response.
// - action: A string describing the action being performed.
//
// Returns:
// - error: An error describing the failure reason.
func (app *FirecrawlApp) handleError(statusCode int, body []byte, action string) error {
var errorData map[string]any
err := json.Unmarshal(body, &errorData)
if err != nil {
return fmt.Errorf("failed to parse error response: %v", err)
}
errorMessage, _ := errorData["error"].(string)
if errorMessage == "" {
errorMessage = "No additional error details provided."
}
var message string
switch statusCode {
case 402:
message = fmt.Sprintf("Payment Required: Failed to %s. %s", action, errorMessage)
case 408:
message = fmt.Sprintf("Request Timeout: Failed to %s as the request timed out. %s", action, errorMessage)
case 409:
message = fmt.Sprintf("Conflict: Failed to %s due to a conflict. %s", action, errorMessage)
case 500:
message = fmt.Sprintf("Internal Server Error: Failed to %s. %s", action, errorMessage)
default:
message = fmt.Sprintf("Unexpected error during %s: Status code %d. %s", action, statusCode, errorMessage)
}
return fmt.Errorf(message)
}

View File

@ -0,0 +1,292 @@
package firecrawl
import (
"log"
"os"
"testing"
"time"
"github.com/google/uuid"
"github.com/joho/godotenv"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var API_URL string
var TEST_API_KEY string
func init() {
err := godotenv.Load("../.env")
if err != nil {
log.Fatalf("Error loading .env file: %v", err)
}
API_URL = os.Getenv("API_URL")
TEST_API_KEY = os.Getenv("TEST_API_KEY")
}
func TestNoAPIKey(t *testing.T) {
_, err := NewFirecrawlApp("", API_URL)
assert.Error(t, err)
assert.Contains(t, err.Error(), "no API key provided")
}
func TestScrapeURLInvalidAPIKey(t *testing.T) {
app, err := NewFirecrawlApp("invalid_api_key", API_URL)
require.NoError(t, err)
_, err = app.ScrapeURL("https://firecrawl.dev", nil)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token")
}
func TestBlocklistedURL(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
_, err = app.ScrapeURL("https://facebook.com/fake-test", nil)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.")
}
func TestSuccessfulResponseWithValidPreviewToken(t *testing.T) {
app, err := NewFirecrawlApp("this_is_just_a_preview_token", API_URL)
require.NoError(t, err)
response, err := app.ScrapeURL("https://roastmywebsite.ai", nil)
require.NoError(t, err)
assert.NotNil(t, response)
assert.Contains(t, response.Content, "_Roast_")
}
func TestScrapeURLE2E(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
response, err := app.ScrapeURL("https://roastmywebsite.ai", nil)
require.NoError(t, err)
assert.NotNil(t, response)
assert.Contains(t, response.Content, "_Roast_")
assert.NotEqual(t, response.Markdown, "")
assert.NotNil(t, response.Metadata)
assert.Equal(t, response.HTML, "")
}
func TestSuccessfulResponseWithValidAPIKeyAndIncludeHTML(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
params := map[string]any{
"pageOptions": map[string]any{
"includeHtml": true,
},
}
response, err := app.ScrapeURL("https://roastmywebsite.ai", params)
require.NoError(t, err)
assert.NotNil(t, response)
assert.Contains(t, response.Content, "_Roast_")
assert.Contains(t, response.Markdown, "_Roast_")
assert.Contains(t, response.HTML, "<h1")
assert.NotNil(t, response.Metadata)
}
func TestSuccessfulResponseForValidScrapeWithPDFFile(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
response, err := app.ScrapeURL("https://arxiv.org/pdf/astro-ph/9301001.pdf", nil)
require.NoError(t, err)
assert.NotNil(t, response)
assert.Contains(t, response.Content, "We present spectrophotometric observations of the Broad Line Radio Galaxy")
assert.NotNil(t, response.Metadata)
}
func TestSuccessfulResponseForValidScrapeWithPDFFileWithoutExplicitExtension(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
response, err := app.ScrapeURL("https://arxiv.org/pdf/astro-ph/9301001", nil)
time.Sleep(6 * time.Second) // wait for 6 seconds
require.NoError(t, err)
assert.NotNil(t, response)
assert.Contains(t, response.Content, "We present spectrophotometric observations of the Broad Line Radio Galaxy")
assert.NotNil(t, response.Metadata)
}
func TestCrawlURLInvalidAPIKey(t *testing.T) {
app, err := NewFirecrawlApp("invalid_api_key", API_URL)
require.NoError(t, err)
_, err = app.CrawlURL("https://firecrawl.dev", nil, false, 2, "")
assert.Error(t, err)
assert.Contains(t, err.Error(), "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token")
}
func TestShouldReturnErrorForBlocklistedURL(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
_, err = app.CrawlURL("https://twitter.com/fake-test", nil, false, 2, "")
assert.Error(t, err)
assert.Contains(t, err.Error(), "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.")
}
func TestCrawlURLWaitForCompletionE2E(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
params := map[string]any{
"crawlerOptions": map[string]any{
"excludes": []string{"blog/*"},
},
}
response, err := app.CrawlURL("https://roastmywebsite.ai", params, true, 2, "")
require.NoError(t, err)
assert.NotNil(t, response)
data, ok := response.([]*FirecrawlDocument)
assert.True(t, ok)
assert.Greater(t, len(data), 0)
assert.Contains(t, data[0].Content, "_Roast_")
}
func TestCrawlURLWithIdempotencyKeyE2E(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
uniqueIdempotencyKey := uuid.New().String()
params := map[string]any{
"crawlerOptions": map[string]any{
"excludes": []string{"blog/*"},
},
}
response, err := app.CrawlURL("https://roastmywebsite.ai", params, true, 2, uniqueIdempotencyKey)
require.NoError(t, err)
assert.NotNil(t, response)
data, ok := response.([]*FirecrawlDocument)
assert.True(t, ok)
assert.Greater(t, len(data), 0)
assert.Contains(t, data[0].Content, "_Roast_")
_, err = app.CrawlURL("https://firecrawl.dev", params, true, 2, uniqueIdempotencyKey)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used")
}
func TestCheckCrawlStatusE2E(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
params := map[string]any{
"crawlerOptions": map[string]any{
"excludes": []string{"blog/*"},
},
}
response, err := app.CrawlURL("https://firecrawl.dev", params, false, 2, "")
require.NoError(t, err)
assert.NotNil(t, response)
jobID, ok := response.(string)
assert.True(t, ok)
assert.NotEqual(t, "", jobID)
time.Sleep(30 * time.Second) // wait for 30 seconds
statusResponse, err := app.CheckCrawlStatus(jobID)
require.NoError(t, err)
assert.NotNil(t, statusResponse)
assert.Equal(t, "completed", statusResponse.Status)
assert.Greater(t, len(statusResponse.Data), 0)
}
func TestSearchE2E(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
response, err := app.Search("test query", nil)
require.NoError(t, err)
assert.NotNil(t, response)
assert.Greater(t, len(response), 2)
assert.NotEqual(t, response[0].Content, "")
}
func TestSearchInvalidAPIKey(t *testing.T) {
app, err := NewFirecrawlApp("invalid_api_key", API_URL)
require.NoError(t, err)
_, err = app.Search("test query", nil)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Unexpected error during search: Status code 401. Unauthorized: Invalid token")
}
func TestLLMExtraction(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
params := map[string]any{
"extractorOptions": ExtractorOptions{
Mode: "llm-extraction",
ExtractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
ExtractionSchema: map[string]any{
"type": "object",
"properties": map[string]any{
"company_mission": map[string]string{"type": "string"},
"supports_sso": map[string]string{"type": "boolean"},
"is_open_source": map[string]string{"type": "boolean"},
},
"required": []string{"company_mission", "supports_sso", "is_open_source"},
},
},
}
response, err := app.ScrapeURL("https://mendable.ai", params)
require.NoError(t, err)
assert.NotNil(t, response)
assert.Contains(t, response.LLMExtraction, "company_mission")
assert.IsType(t, true, response.LLMExtraction["supports_sso"])
assert.IsType(t, true, response.LLMExtraction["is_open_source"])
}
func TestCancelCrawlJobInvalidAPIKey(t *testing.T) {
app, err := NewFirecrawlApp("invalid_api_key", API_URL)
require.NoError(t, err)
_, err = app.CancelCrawlJob("test query")
assert.Error(t, err)
assert.Contains(t, err.Error(), "Unexpected error during cancel crawl job: Status code 401. Unauthorized: Invalid token")
}
func TestCancelNonExistingCrawlJob(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
jobID := uuid.New().String()
_, err = app.CancelCrawlJob(jobID)
assert.Error(t, err)
assert.Contains(t, err.Error(), "Job not found")
}
func TestCancelCrawlJobE2E(t *testing.T) {
app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
require.NoError(t, err)
response, err := app.CrawlURL("https://firecrawl.dev", nil, false, 2, "")
require.NoError(t, err)
assert.NotNil(t, response)
jobID, ok := response.(string)
assert.True(t, ok)
assert.NotEqual(t, "", jobID)
status, err := app.CancelCrawlJob(jobID)
require.NoError(t, err)
assert.Equal(t, "cancelled", status)
}

15
apps/go-sdk/go.mod Normal file
View File

@ -0,0 +1,15 @@
module github.com/mendableai/firecrawl/apps/go-sdk
go 1.22.5
require (
github.com/google/uuid v1.6.0
github.com/joho/godotenv v1.5.1
github.com/stretchr/testify v1.9.0
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

14
apps/go-sdk/go.sum Normal file
View File

@ -0,0 +1,14 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -36,9 +36,9 @@ class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the scrape request.
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
*/
scrapeUrl(url, params = null) {
var _a;
return __awaiter(this, void 0, void 0, function* () {
scrapeUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null) {
var _a;
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
@ -79,8 +79,8 @@ class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the search request.
* @returns {Promise<SearchResponse>} The response from the search operation.
*/
search(query, params = null) {
return __awaiter(this, void 0, void 0, function* () {
search(query_1) {
return __awaiter(this, arguments, void 0, function* (query, params = null) {
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
@ -119,8 +119,8 @@ class FirecrawlApp {
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
return __awaiter(this, void 0, void 0, function* () {
crawlUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url };
if (params) {

View File

@ -31,9 +31,9 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the scrape request.
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
*/
scrapeUrl(url, params = null) {
var _a;
return __awaiter(this, void 0, void 0, function* () {
scrapeUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null) {
var _a;
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
@ -74,8 +74,8 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the search request.
* @returns {Promise<SearchResponse>} The response from the search operation.
*/
search(query, params = null) {
return __awaiter(this, void 0, void 0, function* () {
search(query_1) {
return __awaiter(this, arguments, void 0, function* (query, params = null) {
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
@ -114,8 +114,8 @@ export default class FirecrawlApp {
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
return __awaiter(this, void 0, void 0, function* () {
crawlUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url };
if (params) {

View File

@ -1,12 +1,12 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.29",
"version": "0.0.36",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
"version": "0.0.29",
"version": "0.0.36",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",

View File

@ -1,13 +1,19 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.34",
"version": "0.0.36",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/cjs/index.js",
"types": "types/index.d.ts",
"type": "module",
"exports": {
"require": "./build/cjs/index.js",
"import": "./build/esm/index.js"
"require": {
"types": "./types/index.d.ts",
"default": "./build/cjs/index.js"
},
"import": {
"types": "./types/index.d.ts",
"default": "./build/esm/index.js"
}
},
"scripts": {
"build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",

View File

@ -29,6 +29,7 @@ x-common-service: &common-service
- SCRAPING_BEE_API_KEY=${SCRAPING_BEE_API_KEY}
- HOST=${HOST:-0.0.0.0}
- SELF_HOSTED_WEBHOOK_URL=${SELF_HOSTED_WEBHOOK_URL}
- LOGGING_LEVEL=${LOGGING_LEVEL}
extra_hosts:
- "host.docker.internal:host-gateway"