feat(concurrency-limit): set limit based on plan

This commit is contained in:
Gergő Móricz 2024-09-28 00:19:46 +02:00
parent 29815e084b
commit 3621e191bd
8 changed files with 36 additions and 13 deletions

View File

@ -171,7 +171,8 @@ export async function crawlController(req: Request, res: Response) {
url,
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: team_id,
team_id,
plan,
pageOptions: pageOptions,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
@ -211,7 +212,8 @@ export async function crawlController(req: Request, res: Response) {
url,
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: team_id,
team_id,
plan,
pageOptions: pageOptions,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,

View File

@ -107,7 +107,8 @@ export async function crawlPreviewController(req: Request, res: Response) {
url,
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: team_id,
team_id,
plan,
pageOptions: pageOptions,
origin: "website-preview",
crawl_id: id,
@ -121,7 +122,8 @@ export async function crawlPreviewController(req: Request, res: Response) {
url,
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: team_id,
team_id,
plan,
pageOptions: pageOptions,
origin: "website-preview",
crawl_id: id,

View File

@ -60,6 +60,7 @@ export async function scrapeHelper(
mode: "single_urls",
crawlerOptions,
team_id,
plan,
pageOptions,
extractorOptions,
origin: req.body.origin ?? defaultOrigin,

View File

@ -106,6 +106,7 @@ export async function crawlController(
url,
mode: "single_urls",
team_id: req.auth.team_id,
plan: req.auth.plan,
crawlerOptions,
pageOptions,
origin: "api",
@ -138,6 +139,7 @@ export async function crawlController(
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: req.auth.team_id,
plan: req.auth.plan,
pageOptions: pageOptions,
origin: "api",
crawl_id: id,

View File

@ -44,6 +44,7 @@ export async function scrapeController(
mode: "single_urls",
crawlerOptions: {},
team_id: req.auth.team_id,
plan: req.auth.plan,
pageOptions,
extractorOptions,
origin: req.body.origin,

View File

@ -34,9 +34,10 @@ import {
deleteJobPriority,
getJobPriority,
} from "../../src/lib/job-priority";
import { PlanType } from "../types";
import { PlanType, RateLimiterMode } from "../types";
import { getJobs } from "../../src/controllers/v1/crawl-status";
import { configDotenv } from "dotenv";
import { getRateLimiterPoints } from "./rate-limiter";
configDotenv();
if (process.env.ENV === "production") {
@ -131,9 +132,9 @@ const workerFun = async (
if (job) {
const concurrencyLimiterKey = "concurrency-limiter:" + job.data?.team_id;
if (job.data && job.data.team_id) {
if (job.data && job.data.team_id && job.data.plan) {
const concurrencyLimiterThrottledKey = "concurrency-limiter:" + job.data.team_id + ":throttled";
const concurrencyLimit = 10; // TODO: determine based on price id
const concurrencyLimit = getRateLimiterPoints(RateLimiterMode.Scrape, undefined, job.data.plan);
const now = Date.now();
const stalledJobTimeoutMs = 2 * 60 * 1000;
const throttledJobTimeoutMs = 10 * 60 * 1000;
@ -382,6 +383,7 @@ async function processJob(job: Job, token: string) {
mode: "single_urls",
crawlerOptions: sc.crawlerOptions,
team_id: sc.team_id,
plan: job.data.plan,
pageOptions: sc.pageOptions,
origin: job.data.origin,
crawl_id: job.data.crawl_id,

View File

@ -123,14 +123,18 @@ const testSuiteTokens = ["a01ccae", "6254cf9", "0f96e673", "23befa1b", "69141c4"
const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6"];
export function getRateLimiter(
function makePlanKey(plan?: string) {
return plan ? plan.replace("-", "") : "default"; // "default"
}
export function getRateLimiterPoints(
mode: RateLimiterMode,
token: string,
token?: string,
plan?: string,
teamId?: string
) {
if (testSuiteTokens.some(testToken => token.includes(testToken))) {
if (token && testSuiteTokens.some(testToken => token.includes(testToken))) {
return testSuiteRateLimiter;
}
@ -146,9 +150,17 @@ export function getRateLimiter(
if (!rateLimitConfig) return serverRateLimiter;
const planKey = plan ? plan.replace("-", "") : "default"; // "default"
const points =
rateLimitConfig[planKey] || rateLimitConfig.default || rateLimitConfig; // 5
rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default || rateLimitConfig; // 5
return createRateLimiter(`${mode}-${planKey}`, points);
return points;
}
export function getRateLimiter(
mode: RateLimiterMode,
token?: string,
plan?: string,
teamId?: string
) {
return createRateLimiter(`${mode}-${makePlanKey(plan)}`, getRateLimiterPoints(mode, token, plan, teamId));
}

View File

@ -28,6 +28,7 @@ export interface WebScraperOptions {
pageOptions: any;
extractorOptions?: any;
team_id: string;
plan: string;
origin?: string;
crawl_id?: string;
sitemapped?: boolean;