diff --git a/apps/api/src/controllers/v0/crawl.ts b/apps/api/src/controllers/v0/crawl.ts index a95c85a6..3ebee976 100644 --- a/apps/api/src/controllers/v0/crawl.ts +++ b/apps/api/src/controllers/v0/crawl.ts @@ -171,7 +171,8 @@ export async function crawlController(req: Request, res: Response) { url, mode: "single_urls", crawlerOptions: crawlerOptions, - team_id: team_id, + team_id, + plan, pageOptions: pageOptions, origin: req.body.origin ?? defaultOrigin, crawl_id: id, @@ -211,7 +212,8 @@ export async function crawlController(req: Request, res: Response) { url, mode: "single_urls", crawlerOptions: crawlerOptions, - team_id: team_id, + team_id, + plan, pageOptions: pageOptions, origin: req.body.origin ?? defaultOrigin, crawl_id: id, diff --git a/apps/api/src/controllers/v0/crawlPreview.ts b/apps/api/src/controllers/v0/crawlPreview.ts index f8706867..bceb1df9 100644 --- a/apps/api/src/controllers/v0/crawlPreview.ts +++ b/apps/api/src/controllers/v0/crawlPreview.ts @@ -107,7 +107,8 @@ export async function crawlPreviewController(req: Request, res: Response) { url, mode: "single_urls", crawlerOptions: crawlerOptions, - team_id: team_id, + team_id, + plan, pageOptions: pageOptions, origin: "website-preview", crawl_id: id, @@ -121,7 +122,8 @@ export async function crawlPreviewController(req: Request, res: Response) { url, mode: "single_urls", crawlerOptions: crawlerOptions, - team_id: team_id, + team_id, + plan, pageOptions: pageOptions, origin: "website-preview", crawl_id: id, diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts index db304fed..cfb1366c 100644 --- a/apps/api/src/controllers/v0/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -60,6 +60,7 @@ export async function scrapeHelper( mode: "single_urls", crawlerOptions, team_id, + plan, pageOptions, extractorOptions, origin: req.body.origin ?? defaultOrigin, diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts index e0883fa8..4efe279a 100644 --- a/apps/api/src/controllers/v1/crawl.ts +++ b/apps/api/src/controllers/v1/crawl.ts @@ -106,6 +106,7 @@ export async function crawlController( url, mode: "single_urls", team_id: req.auth.team_id, + plan: req.auth.plan, crawlerOptions, pageOptions, origin: "api", @@ -138,6 +139,7 @@ export async function crawlController( mode: "single_urls", crawlerOptions: crawlerOptions, team_id: req.auth.team_id, + plan: req.auth.plan, pageOptions: pageOptions, origin: "api", crawl_id: id, diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index 41974917..75384876 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -44,6 +44,7 @@ export async function scrapeController( mode: "single_urls", crawlerOptions: {}, team_id: req.auth.team_id, + plan: req.auth.plan, pageOptions, extractorOptions, origin: req.body.origin, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 7468a050..050d672d 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -34,9 +34,10 @@ import { deleteJobPriority, getJobPriority, } from "../../src/lib/job-priority"; -import { PlanType } from "../types"; +import { PlanType, RateLimiterMode } from "../types"; import { getJobs } from "../../src/controllers/v1/crawl-status"; import { configDotenv } from "dotenv"; +import { getRateLimiterPoints } from "./rate-limiter"; configDotenv(); if (process.env.ENV === "production") { @@ -131,9 +132,9 @@ const workerFun = async ( if (job) { const concurrencyLimiterKey = "concurrency-limiter:" + job.data?.team_id; - if (job.data && job.data.team_id) { + if (job.data && job.data.team_id && job.data.plan) { const concurrencyLimiterThrottledKey = "concurrency-limiter:" + job.data.team_id + ":throttled"; - const concurrencyLimit = 10; // TODO: determine based on price id + const concurrencyLimit = getRateLimiterPoints(RateLimiterMode.Scrape, undefined, job.data.plan); const now = Date.now(); const stalledJobTimeoutMs = 2 * 60 * 1000; const throttledJobTimeoutMs = 10 * 60 * 1000; @@ -382,6 +383,7 @@ async function processJob(job: Job, token: string) { mode: "single_urls", crawlerOptions: sc.crawlerOptions, team_id: sc.team_id, + plan: job.data.plan, pageOptions: sc.pageOptions, origin: job.data.origin, crawl_id: job.data.crawl_id, diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 51a0ecfa..e0fc5646 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -123,14 +123,18 @@ const testSuiteTokens = ["a01ccae", "6254cf9", "0f96e673", "23befa1b", "69141c4" const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6"]; -export function getRateLimiter( +function makePlanKey(plan?: string) { + return plan ? plan.replace("-", "") : "default"; // "default" +} + +export function getRateLimiterPoints( mode: RateLimiterMode, - token: string, + token?: string, plan?: string, teamId?: string ) { - if (testSuiteTokens.some(testToken => token.includes(testToken))) { + if (token && testSuiteTokens.some(testToken => token.includes(testToken))) { return testSuiteRateLimiter; } @@ -146,9 +150,17 @@ export function getRateLimiter( if (!rateLimitConfig) return serverRateLimiter; - const planKey = plan ? plan.replace("-", "") : "default"; // "default" const points = - rateLimitConfig[planKey] || rateLimitConfig.default || rateLimitConfig; // 5 + rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default || rateLimitConfig; // 5 - return createRateLimiter(`${mode}-${planKey}`, points); + return points; +} + +export function getRateLimiter( + mode: RateLimiterMode, + token?: string, + plan?: string, + teamId?: string +) { + return createRateLimiter(`${mode}-${makePlanKey(plan)}`, getRateLimiterPoints(mode, token, plan, teamId)); } diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 3795ce1e..a03176da 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -28,6 +28,7 @@ export interface WebScraperOptions { pageOptions: any; extractorOptions?: any; team_id: string; + plan: string; origin?: string; crawl_id?: string; sitemapped?: boolean;