ACUC: Dynamic Limits (FIR-1641) (#1434)

* extend acuc definition

* kill plan

* stuff

* stupid tests

* feat: better acuc

* feat(acuc): mock ACUC when not using db auth
This commit is contained in:
Gergő Móricz 2025-04-10 18:49:23 +02:00 committed by GitHub
parent f2865f6699
commit 6a10f0689d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
43 changed files with 611 additions and 985 deletions

View File

@ -9,8 +9,6 @@ import {
getConcurrencyQueueJobsCount,
ConcurrencyLimitedJob,
} from "../lib/concurrency-limit";
import { CONCURRENCY_LIMIT, getConcurrencyLimitMax } from "../services/rate-limiter";
import { PlanType } from "../types";
// Mock Redis client
jest.mock("../services/queue-service", () => ({
@ -174,34 +172,6 @@ describe("Concurrency Limit", () => {
});
});
describe("getConcurrencyLimitMax", () => {
it("should return correct limit for free plan", () => {
const result = getConcurrencyLimitMax("free");
expect(result).toBe(2);
});
it("should return correct limit for standard plan", () => {
const result = getConcurrencyLimitMax("standard");
expect(result).toBe(CONCURRENCY_LIMIT.standard);
});
it("should return correct limit for scale plan", () => {
const result = getConcurrencyLimitMax("scale");
expect(result).toBe(CONCURRENCY_LIMIT.scale);
});
it("should return default limit for unknown plan", () => {
const result = getConcurrencyLimitMax("unknown" as PlanType);
expect(result).toBe(10);
});
it("should handle special team IDs", () => {
process.env.DEV_B_TEAM_ID = "dev-b-team";
const result = getConcurrencyLimitMax("free", "dev-b-team");
expect(result).toBe(120);
});
});
describe("Integration Scenarios", () => {
it("should handle complete job lifecycle", async () => {
const mockJob: ConcurrencyLimitedJob = {

View File

@ -20,7 +20,6 @@ describe("Deep Research Redis Operations", () => {
const mockResearch: StoredDeepResearch = {
id: "test-id",
team_id: "team-1",
plan: "pro",
createdAt: Date.now(),
status: "processing",
currentDepth: 0,

View File

@ -6,8 +6,8 @@ import {
takeConcurrencyLimitedJob,
removeConcurrencyLimitActiveJob,
} from "../lib/concurrency-limit";
import { getConcurrencyLimitMax } from "../services/rate-limiter";
import { WebScraperOptions, PlanType } from "../types";
import { WebScraperOptions } from "../types";
import { getACUCTeam } from "../controllers/auth";
// Mock all the dependencies
const mockAdd = jest.fn();
@ -32,7 +32,6 @@ jest.mock("uuid", () => ({
describe("Queue Concurrency Integration", () => {
const mockTeamId = "test-team-id";
const mockPlan = "standard" as PlanType;
const mockNow = Date.now();
const defaultScrapeOptions = {
@ -77,7 +76,6 @@ describe("Queue Concurrency Integration", () => {
url: "https://test.com",
mode: "single_urls",
team_id: mockTeamId,
plan: mockPlan,
scrapeOptions: defaultScrapeOptions,
crawlerOptions: null,
};
@ -104,8 +102,10 @@ describe("Queue Concurrency Integration", () => {
it("should add job to concurrency queue when at concurrency limit", async () => {
// Mock current active jobs to be at limit
const maxConcurrency = getConcurrencyLimitMax(mockPlan);
const activeJobs = Array(maxConcurrency).fill("active-job");
(getACUCTeam as jest.Mock).mockResolvedValue({
concurrency: 15,
} as any);
const activeJobs = Array(15).fill("active-job");
(redisConnection.zrangebyscore as jest.Mock).mockResolvedValue(
activeJobs,
);
@ -136,7 +136,6 @@ describe("Queue Concurrency Integration", () => {
url: `https://test${i}.com`,
mode: "single_urls",
team_id: mockTeamId,
plan: mockPlan,
scrapeOptions: defaultScrapeOptions,
} as WebScraperOptions,
opts: {
@ -146,7 +145,10 @@ describe("Queue Concurrency Integration", () => {
}));
it("should handle batch jobs respecting concurrency limits", async () => {
const maxConcurrency = getConcurrencyLimitMax(mockPlan);
const maxConcurrency = 15;
(getACUCTeam as jest.Mock).mockResolvedValue({
concurrency: maxConcurrency,
} as any);
const totalJobs = maxConcurrency + 5; // Some jobs should go to queue
const mockJobs = createMockJobs(totalJobs);
@ -180,7 +182,6 @@ describe("Queue Concurrency Integration", () => {
id: "test-job",
data: {
team_id: mockTeamId,
plan: mockPlan,
},
};
@ -218,7 +219,6 @@ describe("Queue Concurrency Integration", () => {
id: "failing-job",
data: {
team_id: mockTeamId,
plan: mockPlan,
},
};

View File

@ -53,7 +53,7 @@ describe("Crawl tests", () => {
// expect(page.metadata.url ?? page.metadata.sourceURL).toMatch(/^https:\/\/(www\.)?firecrawl\.dev\/blog/);
// }
// }
// }, 120000);
// }, 300000);
// TEMP: Flaky
// it.concurrent("discovers URLs properly when maxDiscoveryDepth is provided", async () => {
@ -71,5 +71,5 @@ describe("Crawl tests", () => {
// expect(page.metadata.url ?? page.metadata.sourceURL).not.toMatch(/^https:\/\/(www\.)?firecrawl\.dev\/blog\/.+$/);
// }
// }
// }, 120000);
// }, 300000);
});

View File

@ -3,7 +3,6 @@ import { getRateLimiter, isTestSuiteToken } from "../services/rate-limiter";
import {
AuthResponse,
NotificationType,
PlanType,
RateLimiterMode,
} from "../types";
import { supabase_rr_service, supabase_service } from "../services/supabase";
@ -16,7 +15,7 @@ import { deleteKey, getValue } from "../services/redis";
import { setValue } from "../services/redis";
import { validate } from "uuid";
import * as Sentry from "@sentry/node";
import { AuthCreditUsageChunk } from "./v1/types";
import { AuthCreditUsageChunk, AuthCreditUsageChunkFromTeam } from "./v1/types";
// const { data, error } = await supabase_service
// .from('api_keys')
// .select(`
@ -38,12 +37,13 @@ function normalizedApiIsUuid(potentialUuid: string): boolean {
export async function setCachedACUC(
api_key: string,
is_extract: boolean,
acuc:
| AuthCreditUsageChunk
| null
| ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk | null),
) {
const cacheKeyACUC = `acuc_${api_key}`;
const cacheKeyACUC = `acuc_${api_key}_${is_extract ? "extract" : "scrape"}`;
const redLockKey = `lock_${cacheKeyACUC}`;
try {
@ -72,13 +72,165 @@ export async function setCachedACUC(
}
}
const mockACUC: () => AuthCreditUsageChunk = () => ({
api_key: "bypass",
team_id: "bypass",
sub_id: "bypass",
sub_current_period_start: new Date().toISOString(),
sub_current_period_end: new Date(new Date().getTime() + 30 * 24 * 60 * 60 * 1000).toISOString(),
sub_user_id: "bypass",
price_id: "bypass",
rate_limits: {
crawl: 99999999,
scrape: 99999999,
extract: 99999999,
search: 99999999,
map: 99999999,
preview: 99999999,
crawlStatus: 99999999,
extractStatus: 99999999,
},
price_credits: 99999999,
credits_used: 0,
coupon_credits: 99999999,
adjusted_credits_used: 0,
remaining_credits: 99999999,
total_credits_sum: 99999999,
plan_priority: {
bucketLimit: 25,
planModifier: 0.1,
},
concurrency: 99999999,
is_extract: false,
});
export async function getACUC(
api_key: string,
cacheOnly = false,
useCache = true,
mode?: RateLimiterMode,
): Promise<AuthCreditUsageChunk | null> {
const cacheKeyACUC = `acuc_${api_key}_${mode}`;
let isExtract =
mode === RateLimiterMode.Extract ||
mode === RateLimiterMode.ExtractStatus;
if (process.env.USE_DB_AUTHENTICATION !== "true") {
const acuc = mockACUC();
acuc.is_extract = isExtract;
return acuc;
}
const cacheKeyACUC = `acuc_${api_key}_${isExtract ? "extract" : "scrape"}`;
// if (useCache) {
// const cachedACUC = await getValue(cacheKeyACUC);
// if (cachedACUC !== null) {
// return JSON.parse(cachedACUC);
// }
// }
// if (!cacheOnly) {
let data;
let error;
let retries = 0;
const maxRetries = 5;
while (retries < maxRetries) {
const client =
Math.random() > (2/3) ? supabase_rr_service : supabase_service;
({ data, error } = await client.rpc(
"auth_credit_usage_chunk_28",
{ input_key: api_key, i_is_extract: isExtract, tally_untallied_credits: true },
{ get: true },
));
if (!error) {
break;
}
logger.warn(
`Failed to retrieve authentication and credit usage data after ${retries}, trying again...`,
{ error }
);
retries++;
if (retries === maxRetries) {
throw new Error(
"Failed to retrieve authentication and credit usage data after 3 attempts: " +
JSON.stringify(error),
);
}
// Wait for a short time before retrying
await new Promise((resolve) => setTimeout(resolve, 200));
}
const chunk: AuthCreditUsageChunk | null =
data.length === 0 ? null : data[0].team_id === null ? null : data[0];
// NOTE: Should we cache null chunks? - mogery
// if (chunk !== null && useCache) {
// setCachedACUC(api_key, isExtract, chunk);
// }
return chunk ? { ...chunk, is_extract: isExtract } : null;
// } else {
// return null;
// }
}
export async function setCachedACUCTeam(
team_id: string,
is_extract: boolean,
acuc:
| AuthCreditUsageChunkFromTeam
| null
| ((acuc: AuthCreditUsageChunkFromTeam) => AuthCreditUsageChunkFromTeam | null),
) {
const cacheKeyACUC = `acuc_team_${team_id}_${is_extract ? "extract" : "scrape"}`;
const redLockKey = `lock_${cacheKeyACUC}`;
try {
await redlock.using([redLockKey], 10000, {}, async (signal) => {
if (typeof acuc === "function") {
acuc = acuc(JSON.parse((await getValue(cacheKeyACUC)) ?? "null"));
if (acuc === null) {
if (signal.aborted) {
throw signal.error;
}
return;
}
}
if (signal.aborted) {
throw signal.error;
}
// Cache for 1 hour. - mogery
await setValue(cacheKeyACUC, JSON.stringify(acuc), 3600, true);
});
} catch (error) {
logger.error(`Error updating cached ACUC ${cacheKeyACUC}: ${error}`);
}
}
export async function getACUCTeam(
team_id: string,
cacheOnly = false,
useCache = true,
mode?: RateLimiterMode,
): Promise<AuthCreditUsageChunkFromTeam | null> {
let isExtract =
mode === RateLimiterMode.Extract ||
mode === RateLimiterMode.ExtractStatus;
if (process.env.USE_DB_AUTHENTICATION !== "true") {
const acuc = mockACUC();
acuc.is_extract = isExtract;
return acuc;
}
const cacheKeyACUC = `acuc_team_${team_id}_${isExtract ? "extract" : "scrape"}`;
// if (useCache) {
// const cachedACUC = await getValue(cacheKeyACUC);
@ -93,15 +245,12 @@ export async function getACUC(
let retries = 0;
const maxRetries = 5;
let isExtract =
mode === RateLimiterMode.Extract ||
mode === RateLimiterMode.ExtractStatus;
while (retries < maxRetries) {
const client =
Math.random() > (2/3) ? supabase_rr_service : supabase_service;
({ data, error } = await client.rpc(
"auth_credit_usage_chunk_27_tally",
{ input_key: api_key, i_is_extract: isExtract, tally_untallied_credits: true },
"auth_credit_usage_chunk_28_from_team",
{ input_team: team_id, i_is_extract: isExtract, tally_untallied_credits: true },
{ get: true },
));
@ -141,10 +290,10 @@ export async function getACUC(
export async function clearACUC(api_key: string): Promise<void> {
// Delete cache for all rate limiter modes
const modes = Object.values(RateLimiterMode);
const modes = [true, false];
await Promise.all(
modes.map(async (mode) => {
const cacheKey = `acuc_${api_key}_${mode}`;
const cacheKey = `acuc_${api_key}_${mode ? "extract" : "scrape"}`;
await deleteKey(cacheKey);
}),
);
@ -153,6 +302,20 @@ export async function clearACUC(api_key: string): Promise<void> {
await deleteKey(`acuc_${api_key}`);
}
export async function clearACUCTeam(team_id: string): Promise<void> {
// Delete cache for all rate limiter modes
const modes = [true, false];
await Promise.all(
modes.map(async (mode) => {
const cacheKey = `acuc_team_${team_id}_${mode ? "extract" : "scrape"}`;
await deleteKey(cacheKey);
}),
);
// Also clear the base cache key
await deleteKey(`acuc_team_${team_id}`);
}
export async function authenticateUser(
req,
res,
@ -192,13 +355,12 @@ export async function supaAuthenticateUser(
const iptoken = incomingIP + token;
let rateLimiter: RateLimiterRedis;
let subscriptionData: { team_id: string; plan: string } | null = null;
let subscriptionData: { team_id: string} | null = null;
let normalizedApi: string;
let teamId: string | null = null;
let priceId: string | null = null;
let chunk: AuthCreditUsageChunk | null = null;
let plan: PlanType = "free";
if (token == "this_is_just_a_preview_token") {
throw new Error(
"Unauthenticated Playground calls are temporarily disabled due to abuse. Please sign up.",
@ -213,7 +375,6 @@ export async function supaAuthenticateUser(
rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
}
teamId = `preview_${iptoken}`;
plan = "free";
} else {
normalizedApi = parseApi(token);
if (!normalizedApiIsUuid(normalizedApi)) {
@ -237,65 +398,13 @@ export async function supaAuthenticateUser(
teamId = chunk.team_id;
priceId = chunk.price_id;
plan = getPlanByPriceId(priceId);
subscriptionData = {
team_id: teamId,
plan,
};
switch (mode) {
case RateLimiterMode.Crawl:
rateLimiter = getRateLimiter(
RateLimiterMode.Crawl,
token,
subscriptionData.plan,
mode ?? RateLimiterMode.Crawl,
chunk.rate_limits,
);
break;
case RateLimiterMode.Scrape:
rateLimiter = getRateLimiter(
RateLimiterMode.Scrape,
token,
subscriptionData.plan,
teamId,
);
break;
case RateLimiterMode.Search:
rateLimiter = getRateLimiter(
RateLimiterMode.Search,
token,
subscriptionData.plan,
);
break;
case RateLimiterMode.Map:
rateLimiter = getRateLimiter(
RateLimiterMode.Map,
token,
subscriptionData.plan,
);
break;
case RateLimiterMode.Extract:
rateLimiter = getRateLimiter(
RateLimiterMode.Extract,
token,
subscriptionData.plan,
);
break;
case RateLimiterMode.ExtractStatus:
rateLimiter = getRateLimiter(RateLimiterMode.ExtractStatus, token);
break;
case RateLimiterMode.CrawlStatus:
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
break;
case RateLimiterMode.Preview:
rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
break;
default:
rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token);
break;
// case RateLimiterMode.Search:
// rateLimiter = await searchRateLimiter(RateLimiterMode.Search, token);
// break;
}
}
const team_endpoint_token =
@ -307,8 +416,8 @@ export async function supaAuthenticateUser(
logger.error(`Rate limit exceeded: ${rateLimiterRes}`, {
teamId,
priceId,
plan: subscriptionData?.plan,
mode,
rateLimits: chunk?.rate_limits,
rateLimiterRes,
});
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
@ -342,7 +451,6 @@ export async function supaAuthenticateUser(
success: true,
team_id: `preview_${iptoken}`,
chunk: null,
plan: "free",
};
// check the origin of the request and make sure its from firecrawl.dev
// const origin = req.headers.origin;
@ -356,65 +464,9 @@ export async function supaAuthenticateUser(
// return { success: false, error: "Unauthorized: Invalid token", status: 401 };
}
if (token && isTestSuiteToken(token)) {
return {
success: true,
team_id: teamId ?? undefined,
// Now we have a test suite plan
plan: "testSuite",
chunk,
};
}
return {
success: true,
team_id: teamId ?? undefined,
plan: (subscriptionData?.plan ?? "") as PlanType,
chunk,
};
}
function getPlanByPriceId(price_id: string | null): PlanType {
switch (price_id) {
case process.env.STRIPE_PRICE_ID_STARTER:
return "starter";
case process.env.STRIPE_PRICE_ID_STANDARD:
return "standard";
case process.env.STRIPE_PRICE_ID_SCALE:
return "scale";
case process.env.STRIPE_PRICE_ID_HOBBY:
case process.env.STRIPE_PRICE_ID_HOBBY_YEARLY:
return "hobby";
case process.env.STRIPE_PRICE_ID_STANDARD_NEW:
case process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY:
return "standardnew";
case process.env.STRIPE_PRICE_ID_GROWTH:
case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
case process.env.STRIPE_PRICE_ID_SCALE_2M:
return "growth";
case process.env.STRIPE_PRICE_ID_GROWTH_DOUBLE_MONTHLY:
return "growthdouble";
case process.env.STRIPE_PRICE_ID_ETIER2C:
return "etier2c";
case process.env.STRIPE_PRICE_ID_ETIER1A_MONTHLY: //ocqh
return "etier1a";
case process.env.STRIPE_PRICE_ID_ETIER_SCALE_1_MONTHLY:
case process.env.STRIPE_PRICE_ID_ETIER_SCALE_1_YEARLY:
case process.env.STRIPE_PRICE_ID_ETIER_SCALE_1_YEARLY_FIRECRAWL:
return "etierscale1";
case process.env.STRIPE_PRICE_ID_ETIER_SCALE_2_YEARLY:
case process.env.STRIPE_PRICE_ID_ETIER_SCALE_2_MONTHLY:
return "etierscale2";
case process.env.STRIPE_PRICE_ID_EXTRACT_STARTER_MONTHLY:
case process.env.STRIPE_PRICE_ID_EXTRACT_STARTER_YEARLY:
return "extract_starter";
case process.env.STRIPE_PRICE_ID_EXTRACT_EXPLORER_MONTHLY:
case process.env.STRIPE_PRICE_ID_EXTRACT_EXPLORER_YEARLY:
return "extract_explorer";
case process.env.STRIPE_PRICE_ID_EXTRACT_PRO_MONTHLY:
case process.env.STRIPE_PRICE_ID_EXTRACT_PRO_YEARLY:
return "extract_pro";
default:
return "free";
}
}

View File

@ -1,6 +1,6 @@
import { Request, Response } from "express";
import { supabase_service } from "../../../services/supabase";
import { clearACUC } from "../../auth";
import { clearACUC, clearACUCTeam } from "../../auth";
import { logger } from "../../../lib/logger";
export async function acucCacheClearController(req: Request, res: Response) {
@ -13,6 +13,7 @@ export async function acucCacheClearController(req: Request, res: Response) {
.eq("team_id", team_id);
await Promise.all((keys.data ?? []).map((x) => clearACUC(x.key)));
await clearACUCTeam(team_id);
logger.info(`ACUC cache cleared for team ${team_id}`);
res.json({ ok: true });

View File

@ -39,7 +39,7 @@ export async function crawlController(req: Request, res: Response) {
return res.status(auth.status).json({ error: auth.error });
}
const { team_id, plan, chunk } = auth;
const { team_id, chunk } = auth;
redisConnection.sadd("teams_using_v0", team_id)
.catch(error => logger.error("Failed to add team to teams_using_v0", { error, team_id }));
@ -170,7 +170,6 @@ export async function crawlController(req: Request, res: Response) {
scrapeOptions,
internalOptions,
team_id,
plan,
createdAt: Date.now(),
};
@ -190,7 +189,6 @@ export async function crawlController(req: Request, res: Response) {
if (urls.length === 0) return;
let jobPriority = await getJobPriority({
plan,
team_id,
basePriority: 21,
});
@ -205,7 +203,6 @@ export async function crawlController(req: Request, res: Response) {
scrapeOptions,
internalOptions,
team_id,
plan,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
sitemapped: true,
@ -236,7 +233,7 @@ export async function crawlController(req: Request, res: Response) {
await lockURL(id, sc, url);
// Not needed, first one should be 15.
// const jobPriority = await getJobPriority({plan, team_id, basePriority: 10})
// const jobPriority = await getJobPriority({team_id, basePriority: 10})
const jobId = uuidv4();
await addScrapeJob(
@ -247,7 +244,6 @@ export async function crawlController(req: Request, res: Response) {
scrapeOptions,
internalOptions,
team_id,
plan: plan!,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
},

View File

@ -31,8 +31,6 @@ export async function crawlPreviewController(req: Request, res: Response) {
return res.status(auth.status).json({ error: auth.error });
}
const { plan } = auth;
let url = req.body.url;
if (!url) {
return res.status(400).json({ error: "Url is required" });
@ -108,7 +106,6 @@ export async function crawlPreviewController(req: Request, res: Response) {
scrapeOptions,
internalOptions,
team_id,
plan,
robots,
createdAt: Date.now(),
};
@ -130,7 +127,6 @@ export async function crawlPreviewController(req: Request, res: Response) {
url,
mode: "single_urls",
team_id,
plan: plan!,
crawlerOptions,
scrapeOptions,
internalOptions,
@ -153,7 +149,6 @@ export async function crawlPreviewController(req: Request, res: Response) {
url,
mode: "single_urls",
team_id,
plan: plan!,
crawlerOptions,
scrapeOptions,
internalOptions,

View File

@ -5,7 +5,7 @@ import {
checkTeamCredits,
} from "../../services/billing/credit_billing";
import { authenticateUser } from "../auth";
import { PlanType, RateLimiterMode } from "../../types";
import { RateLimiterMode } from "../../types";
import { logJob } from "../../services/logging/log_job";
import {
fromLegacyCombo,
@ -39,7 +39,6 @@ export async function scrapeHelper(
pageOptions: PageOptions,
extractorOptions: ExtractorOptions,
timeout: number,
plan?: PlanType,
): Promise<{
success: boolean;
error?: string;
@ -59,7 +58,7 @@ export async function scrapeHelper(
};
}
const jobPriority = await getJobPriority({ plan, team_id, basePriority: 10 });
const jobPriority = await getJobPriority({ team_id, basePriority: 10 });
const { scrapeOptions, internalOptions } = fromLegacyCombo(
pageOptions,
@ -76,7 +75,6 @@ export async function scrapeHelper(
team_id,
scrapeOptions,
internalOptions,
plan: plan!,
origin: req.body.origin ?? defaultOrigin,
is_scrape: true,
},
@ -180,7 +178,7 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(auth.status).json({ error: auth.error });
}
const { team_id, plan, chunk } = auth;
const { team_id, chunk } = auth;
redisConnection.sadd("teams_using_v0", team_id)
.catch(error => logger.error("Failed to add team to teams_using_v0", { error, team_id }));
@ -240,7 +238,6 @@ export async function scrapeController(req: Request, res: Response) {
pageOptions,
extractorOptions,
timeout,
plan,
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;

View File

@ -4,7 +4,7 @@ import {
checkTeamCredits,
} from "../../services/billing/credit_billing";
import { authenticateUser } from "../auth";
import { PlanType, RateLimiterMode } from "../../types";
import { RateLimiterMode } from "../../types";
import { logJob } from "../../services/logging/log_job";
import { PageOptions, SearchOptions } from "../../lib/entities";
import { search } from "../../search";
@ -31,7 +31,6 @@ export async function searchHelper(
crawlerOptions: any,
pageOptions: PageOptions,
searchOptions: SearchOptions,
plan: PlanType | undefined,
): Promise<{
success: boolean;
error?: string;
@ -94,7 +93,7 @@ export async function searchHelper(
return { success: true, error: "No search results found", returnCode: 200 };
}
const jobPriority = await getJobPriority({ plan, team_id, basePriority: 20 });
const jobPriority = await getJobPriority({ team_id, basePriority: 20 });
// filter out social media links
@ -163,7 +162,7 @@ export async function searchController(req: Request, res: Response) {
if (!auth.success) {
return res.status(auth.status).json({ error: auth.error });
}
const { team_id, plan, chunk } = auth;
const { team_id, chunk } = auth;
redisConnection.sadd("teams_using_v0", team_id)
.catch(error => logger.error("Failed to add team to teams_using_v0", { error, team_id }));
@ -202,7 +201,6 @@ export async function searchController(req: Request, res: Response) {
crawlerOptions,
pageOptions,
searchOptions,
plan,
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;

View File

@ -40,7 +40,6 @@ export async function batchScrapeController(
module: "api/v1",
method: "batchScrapeController",
teamId: req.auth.team_id,
plan: req.auth.plan,
});
let urls = req.body.urls;
@ -85,7 +84,6 @@ export async function batchScrapeController(
internalOptions: { disableSmartWaitCache: true, teamId: req.auth.team_id }, // NOTE: smart wait disabled for batch scrapes to ensure contentful scrape, speed does not matter
team_id: req.auth.team_id,
createdAt: Date.now(),
plan: req.auth.plan,
};
if (!req.body.appendToId) {
@ -99,7 +97,6 @@ export async function batchScrapeController(
if (urls.length > 1000) {
// set base to 21
jobPriority = await getJobPriority({
plan: req.auth.plan,
team_id: req.auth.team_id,
basePriority: 21,
});
@ -116,7 +113,6 @@ export async function batchScrapeController(
url: x,
mode: "single_urls" as const,
team_id: req.auth.team_id,
plan: req.auth.plan!,
crawlerOptions: null,
scrapeOptions,
origin: "api",

View File

@ -1,13 +1,10 @@
import { authenticateUser } from "../auth";
import {
ConcurrencyCheckParams,
ConcurrencyCheckResponse,
RequestWithAuth,
} from "./types";
import { RateLimiterMode, PlanType } from "../../types";
import { Response } from "express";
import { redisConnection } from "../../services/queue-service";
import { getConcurrencyLimitMax } from "../../services/rate-limiter";
// Basically just middleware and error wrapping
export async function concurrencyCheckController(
@ -22,14 +19,9 @@ export async function concurrencyCheckController(
Infinity,
);
const maxConcurrency = getConcurrencyLimitMax(
req.auth.plan as PlanType,
req.auth.team_id,
);
return res.status(200).json({
success: true,
concurrency: activeJobsOfTeam.length,
maxConcurrency: maxConcurrency,
maxConcurrency: req.acuc.concurrency,
});
}

View File

@ -191,9 +191,9 @@ export async function crawlStatusWSController(
});
}
const { team_id, plan } = auth;
const { team_id } = auth;
req.auth = { team_id, plan };
req.auth = { team_id };
await crawlStatusWS(ws, req);
} catch (err) {

View File

@ -25,7 +25,6 @@ export async function crawlController(
module: "api/v1",
method: "crawlController",
teamId: req.auth.team_id,
plan: req.auth.plan,
});
logger.debug("Crawl " + id + " starting", {
request: req.body,
@ -84,7 +83,6 @@ export async function crawlController(
internalOptions: { disableSmartWaitCache: true, teamId: req.auth.team_id }, // NOTE: smart wait disabled for crawls to ensure contentful scrape, speed does not matter
team_id: req.auth.team_id,
createdAt: Date.now(),
plan: req.auth.plan,
};
const crawler = crawlToCrawler(id, sc);
@ -104,7 +102,6 @@ export async function crawlController(
url: req.body.url,
mode: "kickoff" as const,
team_id: req.auth.team_id,
plan: req.auth.plan,
crawlerOptions,
scrapeOptions: sc.scrapeOptions,
internalOptions: sc.internalOptions,

View File

@ -1,6 +1,6 @@
import { Request, Response } from "express";
import { RequestWithAuth } from "./types";
import { getACUC } from "../auth";
import { getACUCTeam } from "../auth";
import { logger } from "../../lib/logger";
export async function creditUsageController(
@ -20,7 +20,7 @@ export async function creditUsageController(
}
// Otherwise fetch fresh data
const chunk = await getACUC(req.auth.team_id);
const chunk = await getACUCTeam(req.auth.team_id);
if (!chunk) {
res.status(404).json({
success: false,

View File

@ -52,7 +52,6 @@ export async function deepResearchController(
const jobData = {
request: req.body,
teamId: req.auth.team_id,
plan: req.auth.plan,
subId: req.acuc?.sub_id,
researchId,
};
@ -60,7 +59,6 @@ export async function deepResearchController(
await saveDeepResearch(researchId, {
id: researchId,
team_id: req.auth.team_id,
plan: req.auth.plan,
createdAt: Date.now(),
status: "processing",
currentDepth: 0,

View File

@ -22,7 +22,6 @@ export async function oldExtract(
const result = await performExtraction(extractId, {
request: req.body,
teamId: req.auth.team_id,
plan: req.auth.plan ?? "free",
subId: req.acuc?.sub_id ?? undefined,
});
@ -52,7 +51,6 @@ export async function extractController(
const jobData = {
request: req.body,
teamId: req.auth.team_id,
plan: req.auth.plan,
subId: req.acuc?.sub_id,
extractId,
};
@ -68,7 +66,6 @@ export async function extractController(
await saveExtract(extractId, {
id: extractId,
team_id: req.auth.team_id,
plan: req.auth.plan,
createdAt: Date.now(),
status: "processing",
showSteps: req.body.__experimental_streamSteps,

View File

@ -30,7 +30,6 @@ export async function generateLLMsTextController(
const jobData = {
request: req.body,
teamId: req.auth.team_id,
plan: req.auth.plan,
subId: req.acuc?.sub_id,
generationId,
};
@ -38,7 +37,6 @@ export async function generateLLMsTextController(
await saveGeneratedLlmsTxt(generationId, {
id: generationId,
team_id: req.auth.team_id,
plan: req.auth.plan!, // Add non-null assertion since plan is required
createdAt: Date.now(),
status: "processing",
url: req.body.url,

View File

@ -50,7 +50,6 @@ export async function getMapResults({
includeSubdomains = true,
crawlerOptions = {},
teamId,
plan,
origin,
includeMetadata = false,
allowExternalLinks,
@ -65,7 +64,6 @@ export async function getMapResults({
includeSubdomains?: boolean;
crawlerOptions?: any;
teamId: string;
plan?: string;
origin?: string;
includeMetadata?: boolean;
allowExternalLinks?: boolean;
@ -88,7 +86,6 @@ export async function getMapResults({
internalOptions: { teamId },
team_id: teamId,
createdAt: Date.now(),
plan: plan,
};
const crawler = crawlToCrawler(id, sc);
@ -322,7 +319,6 @@ export async function mapController(
crawlerOptions: req.body,
origin: req.body.origin,
teamId: req.auth.team_id,
plan: req.auth.plan,
abort: abort.signal,
mock: req.body.useMock,
filterByPath: req.body.filterByPath !== false,

View File

@ -1,6 +1,5 @@
import { Response } from "express";
import { supabaseGetJobByIdOnlyData } from "../../lib/supabase-jobs";
import { scrapeStatusRateLimiter } from "../../services/rate-limiter";
export async function scrapeStatusController(req: any, res: any) {
const allowedTeams = [

View File

@ -12,7 +12,6 @@ import { v4 as uuidv4 } from "uuid";
import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import { logJob } from "../../services/logging/log_job";
import { getJobPriority } from "../../lib/job-priority";
import { PlanType } from "../../types";
import { getScrapeQueue } from "../../services/queue-service";
export async function scrapeController(
@ -38,7 +37,6 @@ export async function scrapeController(
const startTime = new Date().getTime();
const jobPriority = await getJobPriority({
plan: req.auth.plan as PlanType,
team_id: req.auth.team_id,
basePriority: 10,
});
@ -51,7 +49,6 @@ export async function scrapeController(
team_id: req.auth.team_id,
scrapeOptions: req.body,
internalOptions: { teamId: req.auth.team_id },
plan: req.auth.plan!,
origin: req.body.origin,
is_scrape: true,
},

View File

@ -13,7 +13,7 @@ import { v4 as uuidv4 } from "uuid";
import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import { logJob } from "../../services/logging/log_job";
import { getJobPriority } from "../../lib/job-priority";
import { PlanType, Mode } from "../../types";
import { Mode } from "../../types";
import { getScrapeQueue } from "../../services/queue-service";
import { search } from "../../search";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
@ -25,7 +25,6 @@ export async function searchAndScrapeSearchResult(
query: string,
options: {
teamId: string;
plan: PlanType | undefined;
origin: string;
timeout: number;
scrapeOptions: ScrapeOptions;
@ -60,7 +59,6 @@ async function scrapeSearchResult(
searchResult: { url: string; title: string; description: string },
options: {
teamId: string;
plan: PlanType | undefined;
origin: string;
timeout: number;
scrapeOptions: ScrapeOptions;
@ -68,7 +66,6 @@ async function scrapeSearchResult(
): Promise<Document> {
const jobId = uuidv4();
const jobPriority = await getJobPriority({
plan: options.plan as PlanType,
team_id: options.teamId,
basePriority: 10,
});
@ -84,7 +81,6 @@ async function scrapeSearchResult(
team_id: options.teamId,
scrapeOptions: options.scrapeOptions,
internalOptions: { teamId: options.teamId },
plan: options.plan || "free",
origin: options.origin,
is_scrape: true,
},
@ -190,7 +186,6 @@ export async function searchController(
const scrapePromises = searchResults.map((result) =>
scrapeSearchResult(result, {
teamId: req.auth.team_id,
plan: req.auth.plan,
origin: req.body.origin,
timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions,

View File

@ -1,6 +1,6 @@
import { Request, Response } from "express";
import { RequestWithAuth } from "./types";
import { getACUC } from "../auth";
import { getACUC, getACUCTeam } from "../auth";
import { logger } from "../../lib/logger";
import { RateLimiterMode } from "../../types";
@ -21,7 +21,7 @@ export async function tokenUsageController(
}
// Otherwise fetch fresh data
const chunk = await getACUC(req.auth.team_id, false, true, RateLimiterMode.Extract);
const chunk = await getACUCTeam(req.auth.team_id, false, true, RateLimiterMode.Extract);
if (!chunk) {
res.status(404).json({
success: false,

View File

@ -2,7 +2,6 @@ import { Request, Response } from "express";
import { z } from "zod";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
import { PlanType } from "../../types";
import { countries } from "../../lib/validate-country";
import {
ExtractorOptions,
@ -729,7 +728,6 @@ export type CrawlErrorsResponse =
type AuthObject = {
team_id: string;
plan: PlanType | undefined;
};
type Account = {
@ -742,18 +740,36 @@ export type AuthCreditUsageChunk = {
sub_id: string | null;
sub_current_period_start: string | null;
sub_current_period_end: string | null;
sub_user_id: string | null;
price_id: string | null;
price_credits: number; // credit limit with assoicated price, or free_credits (500) if free plan
credits_used: number;
coupon_credits: number; // do not rely on this number to be up to date after calling a billTeam
coupons: any[];
adjusted_credits_used: number; // credits this period minus coupons used
remaining_credits: number;
sub_user_id: string | null;
total_credits_sum: number;
plan_priority: {
bucketLimit: number;
planModifier: number;
};
rate_limits: {
crawl: number;
scrape: number;
search: number;
map: number;
extract: number;
preview: number;
crawlStatus: number;
extractStatus: number;
};
concurrency: number;
// appended on JS-side
is_extract?: boolean;
};
export type AuthCreditUsageChunkFromTeam = Omit<AuthCreditUsageChunk, "api_key">;
export interface RequestWithMaybeACUC<
ReqParams = {},
ReqBody = undefined,

View File

@ -4,7 +4,7 @@ import {
deleteJobPriority,
} from "../job-priority";
import { redisConnection } from "../../services/queue-service";
import { PlanType } from "../../types";
import { } from "../../types";
jest.mock("../../services/queue-service", () => ({
redisConnection: {
@ -46,14 +46,14 @@ describe("Job Priority Tests", () => {
test("getJobPriority should return correct priority based on plan and set length", async () => {
const team_id = "team1";
const plan: PlanType = "standard";
const plan = "standard";
(redisConnection.scard as jest.Mock).mockResolvedValue(150);
const priority = await getJobPriority({ plan, team_id });
const priority = await getJobPriority({ team_id });
expect(priority).toBe(10);
(redisConnection.scard as jest.Mock).mockResolvedValue(250);
const priorityExceeded = await getJobPriority({ plan, team_id });
const priorityExceeded = await getJobPriority({ team_id });
expect(priorityExceeded).toBe(20); // basePriority + Math.ceil((250 - 200) * 0.4)
});
@ -61,23 +61,23 @@ describe("Job Priority Tests", () => {
const team_id = "team1";
(redisConnection.scard as jest.Mock).mockResolvedValue(50);
let plan: PlanType = "hobby";
let priority = await getJobPriority({ plan, team_id });
let plan = "hobby";
let priority = await getJobPriority({ team_id });
expect(priority).toBe(10);
(redisConnection.scard as jest.Mock).mockResolvedValue(150);
plan = "hobby";
priority = await getJobPriority({ plan, team_id });
priority = await getJobPriority({ team_id });
expect(priority).toBe(25); // basePriority + Math.ceil((150 - 50) * 0.3)
(redisConnection.scard as jest.Mock).mockResolvedValue(25);
plan = "free";
priority = await getJobPriority({ plan, team_id });
priority = await getJobPriority({ team_id });
expect(priority).toBe(10);
(redisConnection.scard as jest.Mock).mockResolvedValue(60);
plan = "free";
priority = await getJobPriority({ plan, team_id });
priority = await getJobPriority({ team_id });
expect(priority).toBe(28); // basePriority + Math.ceil((60 - 25) * 0.5)
});

View File

@ -11,7 +11,6 @@ export type StoredCrawl = {
scrapeOptions: Omit<ScrapeOptions, "timeout">;
internalOptions: InternalOptions;
team_id: string;
plan?: string;
robots?: string;
cancelled?: boolean;
createdAt: number;
@ -24,7 +23,6 @@ export async function saveCrawl(id: string, crawl: StoredCrawl) {
method: "saveCrawl",
crawlId: id,
teamId: crawl.team_id,
plan: crawl.plan,
});
await redisConnection.set("crawl:" + id, JSON.stringify(crawl));
await redisConnection.expire("crawl:" + id, 24 * 60 * 60);
@ -274,7 +272,6 @@ export async function lockURL(
method: "lockURL",
preNormalizedURL: url,
teamId: sc.team_id,
plan: sc.plan,
});
if (typeof sc.crawlerOptions?.limit === "number") {
@ -335,7 +332,6 @@ export async function lockURLs(
module: "crawl-redis",
method: "lockURL",
teamId: sc.team_id,
plan: sc.plan,
});
// Add to visited_unique set

View File

@ -32,7 +32,6 @@ export type DeepResearchFinding = {
export type StoredDeepResearch = {
id: string;
team_id: string;
plan?: string;
createdAt: number;
status: "processing" | "completed" | "failed" | "cancelled";
error?: any;

View File

@ -1,6 +1,5 @@
import { logger as _logger } from "../logger";
import { updateDeepResearch } from "./deep-research-redis";
import { PlanType } from "../../types";
import { searchAndScrapeSearchResult } from "../../controllers/v1/search";
import { ResearchLLMService, ResearchStateManager } from "./research-manager";
import { logJob } from "../../services/logging/log_job";
@ -10,7 +9,6 @@ import { ExtractOptions } from "../../controllers/v1/types";
interface DeepResearchServiceOptions {
researchId: string;
teamId: string;
plan: string;
query: string;
maxDepth: number;
maxUrls: number;
@ -23,7 +21,7 @@ interface DeepResearchServiceOptions {
}
export async function performDeepResearch(options: DeepResearchServiceOptions) {
const { researchId, teamId, plan, timeLimit, subId, maxUrls } = options;
const { researchId, teamId, timeLimit, subId, maxUrls } = options;
const startTime = Date.now();
let currentTopic = options.query;
let urlsAnalyzed = 0;
@ -39,7 +37,6 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
const state = new ResearchStateManager(
researchId,
teamId,
plan,
options.maxDepth,
logger,
options.query,
@ -98,7 +95,6 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
const response = await searchAndScrapeSearchResult(searchQuery.query, {
teamId: options.teamId,
plan: options.plan as PlanType,
origin: "deep-research",
timeout: 10000,
scrapeOptions: {

View File

@ -29,7 +29,6 @@ export class ResearchStateManager {
constructor(
private readonly researchId: string,
private readonly teamId: string,
private readonly plan: string,
private readonly maxDepth: number,
private readonly logger: Logger,
private readonly topic: string,

View File

@ -1,5 +1,4 @@
import { Document, ScrapeOptions, URLTrace, scrapeOptions } from "../../controllers/v1/types";
import { PlanType } from "../../types";
import { logger } from "../logger";
import { getScrapeQueue } from "../../services/queue-service";
import { waitForJob } from "../../services/queue-jobs";
@ -10,7 +9,6 @@ import type { Logger } from "winston";
interface ScrapeDocumentOptions {
url: string;
teamId: string;
plan: PlanType;
origin: string;
timeout: number;
isSingleUrl?: boolean;
@ -31,7 +29,6 @@ export async function scrapeDocument(
async function attemptScrape(timeout: number) {
const jobId = crypto.randomUUID();
const jobPriority = await getJobPriority({
plan: options.plan,
team_id: options.teamId,
basePriority: 10,
});
@ -46,7 +43,6 @@ export async function scrapeDocument(
useCache: true,
teamId: options.teamId,
},
plan: options.plan,
origin: options.origin,
is_scrape: true,
},

View File

@ -24,7 +24,6 @@ export type ExtractedStep = {
export type StoredExtract = {
id: string;
team_id: string;
plan?: string;
createdAt: number;
status: "processing" | "completed" | "failed" | "cancelled";
error?: any;

View File

@ -4,7 +4,6 @@ import {
TokenUsage,
URLTrace,
} from "../../controllers/v1/types";
import { PlanType } from "../../types";
import { logger as _logger } from "../logger";
import { generateBasicCompletion, processUrl } from "./url-processor";
import { scrapeDocument } from "./document-scraper";
@ -44,7 +43,6 @@ import { buildRephraseToSerpPrompt } from "./build-prompts";
interface ExtractServiceOptions {
request: ExtractRequest;
teamId: string;
plan: PlanType;
subId?: string;
cacheMode?: "load" | "save" | "direct";
cacheKey?: string;
@ -76,7 +74,7 @@ export async function performExtraction(
extractId: string,
options: ExtractServiceOptions,
): Promise<ExtractResult> {
const { request, teamId, plan, subId } = options;
const { request, teamId, subId } = options;
const urlTraces: URLTrace[] = [];
let docsMap: Map<string, Document> = new Map();
let singleAnswerCompletions: completions | null = null;
@ -161,7 +159,6 @@ export async function performExtraction(
url,
prompt: request.prompt,
teamId,
plan,
allowExternalLinks: request.allowExternalLinks,
origin: request.origin,
limit: request.limit,
@ -311,7 +308,6 @@ export async function performExtraction(
{
url,
teamId,
plan,
origin: request.origin || "api",
timeout,
},
@ -574,7 +570,6 @@ export async function performExtraction(
{
url,
teamId,
plan,
origin: request.origin || "api",
timeout,
},

View File

@ -1,6 +1,5 @@
import { MapDocument, URLTrace } from "../../controllers/v1/types";
import { getMapResults } from "../../controllers/v1/map";
import { PlanType } from "../../types";
import { removeDuplicateUrls } from "../validateUrl";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
import { buildPreRerankPrompt, buildRefrasedPrompt } from "./build-prompts";
@ -23,7 +22,6 @@ interface ProcessUrlOptions {
prompt?: string;
schema?: any;
teamId: string;
plan: PlanType;
allowExternalLinks?: boolean;
origin?: string;
limit?: number;
@ -80,7 +78,6 @@ export async function processUrl(
url: baseUrl,
search: searchQuery,
teamId: options.teamId,
plan: options.plan,
allowExternalLinks: options.allowExternalLinks,
origin: options.origin,
limit: options.limit,
@ -117,7 +114,6 @@ export async function processUrl(
const retryMapResults = await getMapResults({
url: baseUrl,
teamId: options.teamId,
plan: options.plan,
allowExternalLinks: options.allowExternalLinks,
origin: options.origin,
limit: options.limit,

View File

@ -4,7 +4,6 @@ import { logger as _logger } from "../logger";
export interface GenerationData {
id: string;
team_id: string;
plan: string;
createdAt: number;
status: "processing" | "completed" | "failed";
url: string;

View File

@ -3,7 +3,6 @@ import { updateGeneratedLlmsTxt } from "./generate-llmstxt-redis";
import { getMapResults } from "../../controllers/v1/map";
import { z } from "zod";
import { scrapeDocument } from "../extract/document-scraper";
import { PlanType } from "../../types";
import {
getLlmsTextFromCache,
saveLlmsTextToCache,
@ -16,7 +15,6 @@ import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExt
interface GenerateLLMsTextServiceOptions {
generationId: string;
teamId: string;
plan: PlanType;
url: string;
maxUrls: number;
showFullText: boolean;
@ -64,7 +62,7 @@ function limitLlmsTxtEntries(llmstxt: string, maxEntries: number): string {
export async function performGenerateLlmsTxt(
options: GenerateLLMsTextServiceOptions,
) {
const { generationId, teamId, plan, url, maxUrls = 100, showFullText, subId } =
const { generationId, teamId, url, maxUrls = 100, showFullText, subId } =
options;
const startTime = Date.now();
const logger = _logger.child({
@ -113,7 +111,6 @@ export async function performGenerateLlmsTxt(
const mapResult = await getMapResults({
url,
teamId,
plan,
limit: effectiveMaxUrls,
includeSubdomains: false,
ignoreSitemap: false,
@ -142,7 +139,6 @@ export async function performGenerateLlmsTxt(
{
url,
teamId,
plan,
origin: url,
timeout: 30000,
isSingleUrl: true,

View File

@ -1,5 +1,5 @@
import { redisConnection } from "../../src/services/queue-service";
import { PlanType } from "../../src/types";
import { getACUC, getACUCTeam } from "../controllers/auth";
import { redisConnection } from "../services/queue-service";
import { logger } from "./logger";
const SET_KEY_PREFIX = "limit_team_id:";
@ -29,11 +29,9 @@ export async function deleteJobPriority(team_id, job_id) {
}
export async function getJobPriority({
plan,
team_id,
basePriority = 10,
}: {
plan: PlanType | undefined;
team_id: string;
basePriority?: number;
}): Promise<number> {
@ -42,52 +40,16 @@ export async function getJobPriority({
}
try {
const acuc = await getACUCTeam(team_id);
const setKey = SET_KEY_PREFIX + team_id;
// Get the length of the set
const setLength = await redisConnection.scard(setKey);
// Determine the priority based on the plan and set length
let planModifier = 1;
let bucketLimit = 0;
switch (plan) {
case "testSuite":
bucketLimit = 1000;
planModifier = 0.25;
break;
case "free":
bucketLimit = 25;
planModifier = 0.5;
break;
case "hobby":
bucketLimit = 100;
planModifier = 0.3;
break;
case "standard":
case "standardnew":
bucketLimit = 200;
planModifier = 0.2;
break;
case "growth":
case "growthdouble":
bucketLimit = 400;
planModifier = 0.1;
break;
case "etier2c":
bucketLimit = 1000;
planModifier = 0.05;
break;
case "etier1a":
bucketLimit = 1000;
planModifier = 0.05;
break;
default:
bucketLimit = 25;
planModifier = 1;
break;
}
let planModifier = acuc?.plan_priority.planModifier ?? 1;
let bucketLimit = acuc?.plan_priority.bucketLimit ?? 25;
// if length set is smaller than set, just return base priority
if (setLength <= bucketLimit) {
@ -100,7 +62,7 @@ export async function getJobPriority({
}
} catch (e) {
logger.error(
`Get job priority failed: ${team_id}, ${plan}, ${basePriority}`,
`Get job priority failed: ${team_id}, ${basePriority}`,
);
return basePriority;
}

View File

@ -105,9 +105,9 @@ export function authMiddleware(
}
}
const { team_id, plan, chunk } = auth;
const { team_id, chunk } = auth;
req.auth = { team_id, plan };
req.auth = { team_id };
req.acuc = chunk ?? undefined;
if (chunk) {
req.account = { remainingCredits: chunk.remaining_credits };

View File

@ -4,7 +4,7 @@ import { supabase_service } from "../supabase";
import * as Sentry from "@sentry/node";
import { Queue } from "bullmq";
import { withAuth } from "../../lib/withAuth";
import { getACUC, setCachedACUC } from "../../controllers/auth";
import { getACUC, setCachedACUC, setCachedACUCTeam } from "../../controllers/auth";
// Configuration constants
const BATCH_KEY = "billing_batch";
@ -298,7 +298,17 @@ async function supaBillTeam(
// Update cached ACUC to reflect the new credit usage
(async () => {
for (const apiKey of (data ?? []).map((x) => x.api_key)) {
await setCachedACUC(apiKey, (acuc) =>
await setCachedACUC(apiKey, is_extract, (acuc) =>
acuc
? {
...acuc,
credits_used: acuc.credits_used + credits,
adjusted_credits_used: acuc.adjusted_credits_used + credits,
remaining_credits: acuc.remaining_credits - credits,
}
: null,
);
await setCachedACUCTeam(team_id, is_extract, (acuc) =>
acuc
? {
...acuc,

View File

@ -1,6 +1,6 @@
import { getScrapeQueue } from "./queue-service";
import { v4 as uuidv4 } from "uuid";
import { NotificationType, PlanType, WebScraperOptions } from "../types";
import { NotificationType, WebScraperOptions } from "../types";
import * as Sentry from "@sentry/node";
import {
cleanOldConcurrencyLimitEntries,
@ -10,9 +10,9 @@ import {
pushConcurrencyLimitedJob,
} from "../lib/concurrency-limit";
import { logger } from "../lib/logger";
import { getConcurrencyLimitMax } from "./rate-limiter";
import { sendNotificationWithCustomDays } from './notification/email_notification';
import { shouldSendConcurrencyLimitNotification } from './notification/notification-check';
import { getACUC, getACUCTeam } from "../controllers/auth";
/**
* Checks if a job is a crawl or batch scrape based on its options
@ -51,8 +51,7 @@ export async function _addScrapeJobToBullMQ(
) {
if (
webScraperOptions &&
webScraperOptions.team_id &&
webScraperOptions.plan
webScraperOptions.team_id
) {
await pushConcurrencyLimitActiveJob(webScraperOptions.team_id, jobId, 60 * 1000); // 60s default timeout
}
@ -79,7 +78,7 @@ async function addScrapeJobRaw(
webScraperOptions.team_id
) {
const now = Date.now();
maxConcurrency = getConcurrencyLimitMax(webScraperOptions.plan ?? "free", webScraperOptions.team_id);
maxConcurrency = (await getACUCTeam(webScraperOptions.team_id))?.concurrency ?? 2;
cleanOldConcurrencyLimitEntries(webScraperOptions.team_id, now);
currentActiveConcurrency = (await getConcurrencyLimitActiveJobs(webScraperOptions.team_id, now)).length;
concurrencyLimited = currentActiveConcurrency >= maxConcurrency;
@ -170,9 +169,9 @@ export async function addScrapeJobs(
let currentActiveConcurrency = 0;
let maxConcurrency = 0;
if (jobs[0].data && jobs[0].data.team_id && jobs[0].data.plan) {
if (jobs[0].data && jobs[0].data.team_id) {
const now = Date.now();
maxConcurrency = getConcurrencyLimitMax(jobs[0].data.plan as PlanType, jobs[0].data.team_id);
maxConcurrency = (await getACUCTeam(jobs[0].data.team_id))?.concurrency ?? 2;
cleanOldConcurrencyLimitEntries(jobs[0].data.team_id, now);
currentActiveConcurrency = (await getConcurrencyLimitActiveJobs(jobs[0].data.team_id, now)).length;

View File

@ -48,11 +48,9 @@ import {
deleteJobPriority,
getJobPriority,
} from "../../src/lib/job-priority";
import { PlanType, RateLimiterMode } from "../types";
import { getJobs } from "..//controllers/v1/crawl-status";
import { configDotenv } from "dotenv";
import { scrapeOptions } from "../controllers/v1/types";
import { getRateLimiterPoints } from "./rate-limiter";
import {
cleanOldConcurrencyLimitEntries,
pushConcurrencyLimitActiveJob,
@ -144,7 +142,6 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
url,
mode: "single_urls" as const,
team_id: job.data.team_id,
plan: job.data.plan!,
crawlerOptions: {
...job.data.crawlerOptions,
urlInvisibleInCurrentCrawl: true,
@ -407,7 +404,6 @@ const processExtractJobInternal = async (
const result = await performExtraction(job.data.extractId, {
request: job.data.request,
teamId: job.data.teamId,
plan: job.data.plan,
subId: job.data.subId,
});
@ -489,7 +485,6 @@ const processDeepResearchJobInternal = async (
const result = await performDeepResearch({
researchId: job.data.researchId,
teamId: job.data.teamId,
plan: job.data.plan,
query: job.data.request.query,
maxDepth: job.data.request.maxDepth,
timeLimit: job.data.request.timeLimit,
@ -564,7 +559,6 @@ const processGenerateLlmsTxtJobInternal = async (
const result = await performGenerateLlmsTxt({
generationId: job.data.generationId,
teamId: job.data.teamId,
plan: job.data.plan,
url: job.data.request.url,
maxUrls: job.data.request.maxUrls,
showFullText: job.data.request.showFullText,
@ -682,7 +676,7 @@ const workerFun = async (
runningJobs.delete(job.id);
}
if (job.id && job.data && job.data.team_id && job.data.plan) {
if (job.id && job.data && job.data.team_id) {
await removeConcurrencyLimitActiveJob(job.data.team_id, job.id);
cleanOldConcurrencyLimitEntries(job.data.team_id);
@ -805,7 +799,6 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
crawlerOptions: job.data.crawlerOptions,
scrapeOptions: scrapeOptions.parse(job.data.scrapeOptions),
internalOptions: sc.internalOptions,
plan: job.data.plan!,
origin: job.data.origin,
crawl_id: job.data.crawl_id,
webhook: job.data.webhook,
@ -844,7 +837,6 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
});
let jobPriority = await getJobPriority({
plan: job.data.plan,
team_id: job.data.team_id,
basePriority: 21,
});
@ -858,7 +850,6 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
url,
mode: "single_urls" as const,
team_id: job.data.team_id,
plan: job.data.plan!,
crawlerOptions: job.data.crawlerOptions,
scrapeOptions: job.data.scrapeOptions,
internalOptions: sc.internalOptions,
@ -1155,7 +1146,6 @@ async function processJob(job: Job & { id: string }, token: string) {
if (await lockURL(job.data.crawl_id, sc, link)) {
// This seems to work really welel
const jobPriority = await getJobPriority({
plan: sc.plan as PlanType,
team_id: sc.team_id,
basePriority: job.data.crawl_id ? 20 : 10,
});
@ -1169,7 +1159,6 @@ async function processJob(job: Job & { id: string }, token: string) {
{ jobPriority, url: link },
);
// console.log("plan: ", sc.plan);
// console.log("team_id: ", sc.team_id)
// console.log("base priority: ", job.data.crawl_id ? 20 : 10)
// console.log("job priority: " , jobPriority, "\n\n\n")
@ -1185,7 +1174,6 @@ async function processJob(job: Job & { id: string }, token: string) {
...sc.crawlerOptions,
currentDiscoveryDepth: (job.data.crawlerOptions?.currentDiscoveryDepth ?? 0) + 1,
},
plan: job.data.plan,
origin: job.data.origin,
crawl_id: job.data.crawl_id,
webhook: job.data.webhook,

View File

@ -1,370 +1,370 @@
import {
getRateLimiter,
serverRateLimiter,
testSuiteRateLimiter,
redisRateLimitClient,
} from "./rate-limiter";
import { RateLimiterMode } from "../../src/types";
import { RateLimiterRedis } from "rate-limiter-flexible";
// import {
// getRateLimiter,
// serverRateLimiter,
// redisRateLimitClient,
// } from "./rate-limiter";
// import { RateLimiterMode } from "../../src/types";
// import { RateLimiterRedis } from "rate-limiter-flexible";
describe("Rate Limiter Service", () => {
beforeAll(async () => {
try {
await redisRateLimitClient.connect();
// if (process.env.REDIS_RATE_LIMIT_URL === "redis://localhost:6379") {
// console.log("Erasing all keys");
// // erase all the keys that start with "test-prefix"
// const keys = await redisRateLimitClient.keys("test-prefix:*");
// if (keys.length > 0) {
// await redisRateLimitClient.del(...keys);
// }
// }
} catch (error) {}
});
// describe("Rate Limiter Service", () => {
// beforeAll(async () => {
// try {
// await redisRateLimitClient.connect();
// // if (process.env.REDIS_RATE_LIMIT_URL === "redis://localhost:6379") {
// // console.log("Erasing all keys");
// // // erase all the keys that start with "test-prefix"
// // const keys = await redisRateLimitClient.keys("test-prefix:*");
// // if (keys.length > 0) {
// // await redisRateLimitClient.del(...keys);
// // }
// // }
// } catch (error) {}
// });
afterAll(async () => {
try {
// if (process.env.REDIS_RATE_LIMIT_URL === "redis://localhost:6379") {
await redisRateLimitClient.disconnect();
// }
} catch (error) {}
});
// afterAll(async () => {
// try {
// // if (process.env.REDIS_RATE_LIMIT_URL === "redis://localhost:6379") {
// await redisRateLimitClient.disconnect();
// // }
// } catch (error) {}
// });
it("should return the testSuiteRateLimiter for specific tokens", () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:a01ccae",
);
expect(limiter).toBe(testSuiteRateLimiter);
// it("should return the testSuiteRateLimiter for specific tokens", () => {
// const limiter = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:a01ccae",
// );
// expect(limiter).toBe(testSuiteRateLimiter);
const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:6254cf9",
);
expect(limiter2).toBe(testSuiteRateLimiter);
});
// const limiter2 = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:6254cf9",
// );
// expect(limiter2).toBe(testSuiteRateLimiter);
// });
it("should return the serverRateLimiter if mode is not found", () => {
const limiter = getRateLimiter(
"nonexistent" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter.points).toBe(serverRateLimiter.points);
});
// it("should return the serverRateLimiter if mode is not found", () => {
// const limiter = getRateLimiter(
// "nonexistent" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter.points).toBe(serverRateLimiter.points);
// });
it("should return the correct rate limiter based on mode and plan", () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(2);
// it("should return the correct rate limiter based on mode and plan", () => {
// const limiter = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(2);
const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"standard",
);
expect(limiter2.points).toBe(100);
// const limiter2 = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someToken",
// "standard",
// );
// expect(limiter2.points).toBe(100);
const limiter3 = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"growth",
);
expect(limiter3.points).toBe(500);
// const limiter3 = getRateLimiter(
// "search" as RateLimiterMode,
// "test-prefix:someToken",
// "growth",
// );
// expect(limiter3.points).toBe(500);
const limiter4 = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
"growth",
);
expect(limiter4.points).toBe(250);
});
// const limiter4 = getRateLimiter(
// "crawlStatus" as RateLimiterMode,
// "test-prefix:someToken",
// "growth",
// );
// expect(limiter4.points).toBe(250);
// });
it("should return the default rate limiter if plan is not provided", () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter.points).toBe(3);
// it("should return the default rate limiter if plan is not provided", () => {
// const limiter = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter.points).toBe(3);
const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(20);
});
// const limiter2 = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(20);
// });
it("should create a new RateLimiterRedis instance with correct parameters", () => {
const keyPrefix = "test-prefix";
const points = 10;
const limiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix,
points,
duration: 60,
});
// it("should create a new RateLimiterRedis instance with correct parameters", () => {
// const keyPrefix = "test-prefix";
// const points = 10;
// const limiter = new RateLimiterRedis({
// storeClient: redisRateLimitClient,
// keyPrefix,
// points,
// duration: 60,
// });
expect(limiter.keyPrefix).toBe(keyPrefix);
expect(limiter.points).toBe(points);
expect(limiter.duration).toBe(60);
});
// expect(limiter.keyPrefix).toBe(keyPrefix);
// expect(limiter.points).toBe(points);
// expect(limiter.duration).toBe(60);
// });
it("should return the correct rate limiter for 'preview' mode", () => {
const limiter = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(5);
// it("should return the correct rate limiter for 'preview' mode", () => {
// const limiter = getRateLimiter(
// "preview" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(5);
});
// const limiter2 = getRateLimiter(
// "preview" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(5);
// });
it("should return the correct rate limiter for 'account' mode", () => {
const limiter = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(100);
// it("should return the correct rate limiter for 'account' mode", () => {
// const limiter = getRateLimiter(
// "account" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(100);
const limiter2 = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(100);
});
// const limiter2 = getRateLimiter(
// "account" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(100);
// });
it("should return the correct rate limiter for 'crawlStatus' mode", () => {
const limiter = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(150);
// it("should return the correct rate limiter for 'crawlStatus' mode", () => {
// const limiter = getRateLimiter(
// "crawlStatus" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(150);
const limiter2 = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(250);
});
// const limiter2 = getRateLimiter(
// "crawlStatus" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(250);
// });
it("should consume points correctly for 'crawl' mode", async () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someTokenCRAWL",
"free",
);
const consumePoints = 1;
// it("should consume points correctly for 'crawl' mode", async () => {
// const limiter = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someTokenCRAWL",
// "free",
// );
// const consumePoints = 1;
const res = await limiter.consume(
"test-prefix:someTokenCRAWL",
consumePoints,
);
expect(res.remainingPoints).toBe(1);
});
// const res = await limiter.consume(
// "test-prefix:someTokenCRAWL",
// consumePoints,
// );
// expect(res.remainingPoints).toBe(1);
// });
it("should consume points correctly for 'scrape' mode (DEFAULT)", async () => {
const limiter = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someTokenX",
);
const consumePoints = 4;
// it("should consume points correctly for 'scrape' mode (DEFAULT)", async () => {
// const limiter = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someTokenX",
// );
// const consumePoints = 4;
const res = await limiter.consume("test-prefix:someTokenX", consumePoints);
expect(res.remainingPoints).toBe(16);
});
// const res = await limiter.consume("test-prefix:someTokenX", consumePoints);
// expect(res.remainingPoints).toBe(16);
// });
it("should consume points correctly for 'scrape' mode (HOBBY)", async () => {
const limiter = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someTokenXY",
"hobby",
);
expect(limiter.points).toBe(20);
// it("should consume points correctly for 'scrape' mode (HOBBY)", async () => {
// const limiter = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someTokenXY",
// "hobby",
// );
// expect(limiter.points).toBe(20);
const consumePoints = 5;
// const consumePoints = 5;
const res = await limiter.consume("test-prefix:someTokenXY", consumePoints);
expect(res.consumedPoints).toBe(5);
expect(res.remainingPoints).toBe(15);
});
// const res = await limiter.consume("test-prefix:someTokenXY", consumePoints);
// expect(res.consumedPoints).toBe(5);
// expect(res.remainingPoints).toBe(15);
// });
it("should return the correct rate limiter for 'crawl' mode", () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(2);
// it("should return the correct rate limiter for 'crawl' mode", () => {
// const limiter = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(2);
const limiter2 = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"starter",
);
expect(limiter2.points).toBe(10);
// const limiter2 = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someToken",
// "starter",
// );
// expect(limiter2.points).toBe(10);
const limiter3 = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"standard",
);
expect(limiter3.points).toBe(5);
});
// const limiter3 = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someToken",
// "standard",
// );
// expect(limiter3.points).toBe(5);
// });
it("should return the correct rate limiter for 'scrape' mode", () => {
const limiter = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(10);
// it("should return the correct rate limiter for 'scrape' mode", () => {
// const limiter = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(10);
const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"starter",
);
expect(limiter2.points).toBe(100);
// const limiter2 = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someToken",
// "starter",
// );
// expect(limiter2.points).toBe(100);
const limiter3 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"standard",
);
expect(limiter3.points).toBe(100);
// const limiter3 = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someToken",
// "standard",
// );
// expect(limiter3.points).toBe(100);
const limiter4 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"growth",
);
expect(limiter4.points).toBe(1000);
});
// const limiter4 = getRateLimiter(
// "scrape" as RateLimiterMode,
// "test-prefix:someToken",
// "growth",
// );
// expect(limiter4.points).toBe(1000);
// });
it("should return the correct rate limiter for 'search' mode", () => {
const limiter = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(5);
// it("should return the correct rate limiter for 'search' mode", () => {
// const limiter = getRateLimiter(
// "search" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"starter",
);
expect(limiter2.points).toBe(50);
// const limiter2 = getRateLimiter(
// "search" as RateLimiterMode,
// "test-prefix:someToken",
// "starter",
// );
// expect(limiter2.points).toBe(50);
const limiter3 = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"standard",
);
expect(limiter3.points).toBe(50);
});
// const limiter3 = getRateLimiter(
// "search" as RateLimiterMode,
// "test-prefix:someToken",
// "standard",
// );
// expect(limiter3.points).toBe(50);
// });
it("should return the correct rate limiter for 'preview' mode", () => {
const limiter = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(5);
// it("should return the correct rate limiter for 'preview' mode", () => {
// const limiter = getRateLimiter(
// "preview" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(5);
});
// const limiter2 = getRateLimiter(
// "preview" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(5);
// });
it("should return the correct rate limiter for 'account' mode", () => {
const limiter = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(100);
// it("should return the correct rate limiter for 'account' mode", () => {
// const limiter = getRateLimiter(
// "account" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(100);
const limiter2 = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(100);
});
// const limiter2 = getRateLimiter(
// "account" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(100);
// });
it("should return the correct rate limiter for 'crawlStatus' mode", () => {
const limiter = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(150);
// it("should return the correct rate limiter for 'crawlStatus' mode", () => {
// const limiter = getRateLimiter(
// "crawlStatus" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(150);
const limiter2 = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(250);
});
// const limiter2 = getRateLimiter(
// "crawlStatus" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(250);
// });
it("should return the correct rate limiter for 'testSuite' mode", () => {
const limiter = getRateLimiter(
"testSuite" as RateLimiterMode,
"test-prefix:someToken",
"free",
);
expect(limiter.points).toBe(10000);
// it("should return the correct rate limiter for 'testSuite' mode", () => {
// const limiter = getRateLimiter(
// "testSuite" as RateLimiterMode,
// "test-prefix:someToken",
// "free",
// );
// expect(limiter.points).toBe(10000);
const limiter2 = getRateLimiter(
"testSuite" as RateLimiterMode,
"test-prefix:someToken",
);
expect(limiter2.points).toBe(10000);
});
// const limiter2 = getRateLimiter(
// "testSuite" as RateLimiterMode,
// "test-prefix:someToken",
// );
// expect(limiter2.points).toBe(10000);
// });
it("should throw an error when consuming more points than available", async () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
);
const consumePoints = limiter.points + 1;
// it("should throw an error when consuming more points than available", async () => {
// const limiter = getRateLimiter(
// "crawl" as RateLimiterMode,
// "test-prefix:someToken",
// );
// const consumePoints = limiter.points + 1;
try {
await limiter.consume("test-prefix:someToken", consumePoints);
} catch (error) {
// expect remaining points to be 0
const res = await limiter.get("test-prefix:someToken");
expect(res?.remainingPoints).toBe(0);
}
});
// try {
// await limiter.consume("test-prefix:someToken", consumePoints);
// } catch (error) {
// // expect remaining points to be 0
// const res = await limiter.get("test-prefix:someToken");
// expect(res?.remainingPoints).toBe(0);
// }
// });
it("should reset points after duration", async () => {
const keyPrefix = "test-prefix";
const points = 10;
const duration = 1; // 1 second
const limiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix,
points,
duration,
});
// it("should reset points after duration", async () => {
// const keyPrefix = "test-prefix";
// const points = 10;
// const duration = 1; // 1 second
// const limiter = new RateLimiterRedis({
// storeClient: redisRateLimitClient,
// keyPrefix,
// points,
// duration,
// });
const consumePoints = 5;
await limiter.consume("test-prefix:someToken", consumePoints);
await new Promise((resolve) => setTimeout(resolve, duration * 1000 + 100)); // Wait for duration + 100ms
// const consumePoints = 5;
// await limiter.consume("test-prefix:someToken", consumePoints);
// await new Promise((resolve) => setTimeout(resolve, duration * 1000 + 100)); // Wait for duration + 100ms
const res = await limiter.consume("test-prefix:someToken", consumePoints);
expect(res.remainingPoints).toBe(points - consumePoints);
});
});
// const res = await limiter.consume("test-prefix:someToken", consumePoints);
// expect(res.remainingPoints).toBe(points - consumePoints);
// });
// });
// TODO: FIX

View File

@ -1,162 +1,7 @@
import { RateLimiterRedis } from "rate-limiter-flexible";
import { PlanType, RateLimiterMode } from "../../src/types";
import { RateLimiterMode } from "../types";
import Redis from "ioredis";
export const CONCURRENCY_LIMIT: Omit<Record<PlanType, number>, ""> = {
free: 2,
hobby: 5,
starter: 50,
standard: 50,
standardNew: 50,
standardnew: 50,
scale: 100,
growth: 100,
growthdouble: 100,
etier2c: 300,
etier1a: 200,
etier2a: 300,
etierscale1: 150,
etierscale2: 200,
testSuite: 200,
devB: 120,
etier2d: 250,
manual: 200,
extract_starter: 20,
extract_explorer: 100,
extract_pro: 200
};
const RATE_LIMITS = {
crawl: {
default: 15,
free: 2,
starter: 50,
standard: 25,
standardOld: 200,
scale: 250,
hobby: 15,
standardNew: 50,
standardnew: 50,
growth: 250,
growthdouble: 250,
etier2c: 1500,
etier1a: 5000,
etier2a: 1500,
etierscale1: 750,
etierscale2: 1500,
// extract ops
extract_starter: 100,
extract_explorer: 500,
extract_pro: 1000,
},
scrape: {
default: 100,
free: 10,
starter: 500,
standard: 500,
standardOld: 500,
scale: 2500,
hobby: 100,
standardNew: 500,
standardnew: 500,
growth: 5000,
growthdouble: 5000,
etier2c: 12500,
etier1a: 5000,
etier2a: 12500,
etierscale1: 7500,
etierscale2: 12500,
// extract ops
extract_starter: 100,
extract_explorer: 500,
extract_pro: 1000,
},
search: {
default: 100,
free: 5,
starter: 250,
standard: 250,
standardOld: 200,
scale: 2500,
hobby: 50,
standardNew: 250,
standardnew: 250,
growth: 2500,
growthdouble: 2500,
etier2c: 12500,
etier1a: 5000,
etier2a: 12500,
etierscale1: 7500,
etierscale2: 12500,
// extract ops
extract_starter: 100,
extract_explorer: 500,
extract_pro: 1000,
},
map: {
default: 100,
free: 5,
starter: 250,
standard: 250,
standardOld: 250,
scale: 2500,
hobby: 50,
standardNew: 250,
standardnew: 250,
growth: 5000,
growthdouble: 5000,
etier2c: 12500,
etier1a: 5000,
etier2a: 12500,
etierscale1: 7500,
etierscale2: 12500,
// extract ops
extract_starter: 100,
extract_explorer: 500,
extract_pro: 1000,
},
extract: {
default: 100,
free: 10,
starter: 500,
standard: 500,
standardOld: 500,
scale: 1000,
hobby: 100,
standardNew: 500,
standardnew: 500,
growth: 1000,
growthdouble: 1000,
etier2c: 1000,
etier1a: 1000,
etier2a: 1000,
etierscale1: 1000,
etierscale2: 1000,
extract_starter: 100,
extract_explorer: 500,
extract_pro: 1000,
},
preview: {
free: 5,
default: 25,
},
account: {
free: 100,
default: 500,
},
crawlStatus: {
free: 500,
default: 25000,
},
extractStatus: {
free: 500,
default: 25000,
},
testSuite: {
free: 10000,
default: 50000,
},
};
import type { AuthCreditUsageChunk } from "../controllers/v1/types";
export const redisRateLimitClient = new Redis(
process.env.REDIS_RATE_LIMIT_URL!,
@ -170,11 +15,6 @@ const createRateLimiter = (keyPrefix, points) =>
duration: 60, // Duration in seconds
});
export const serverRateLimiter = createRateLimiter(
"server",
RATE_LIMITS.account.default,
);
export const testSuiteRateLimiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix: "test-suite",
@ -182,41 +22,7 @@ export const testSuiteRateLimiter = new RateLimiterRedis({
duration: 60, // Duration in seconds
});
export const devBRateLimiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix: "dev-b",
points: 1200,
duration: 60, // Duration in seconds
});
export const manualRateLimiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix: "manual",
points: 10000,
duration: 60, // Duration in seconds
});
export const scrapeStatusRateLimiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix: "scrape-status",
points: 400,
duration: 60, // Duration in seconds
});
export const etier1aRateLimiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix: "etier1a",
points: 10000,
duration: 60, // Duration in seconds
});
export const etier2aRateLimiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix: "etier2a",
points: 2500,
duration: 60, // Duration in seconds
});
// TODO: PUT OVERRIDES FOR THESE INTO THE DB - mogery
const testSuiteTokens = [
"a01ccae",
"6254cf9",
@ -240,105 +46,32 @@ const testSuiteTokens = [
"0a18c9e", // gh
];
const manual_growth = ["22a07b64-cbfe-4924-9273-e3f01709cdf2"];
const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6", "9661a311-3d75-45d2-bb70-71004d995873"];
const manual_etier2c = ["77545e01-9cec-4fa9-8356-883fc66ac13e", "778c62c4-306f-4039-b372-eb20174760c0"];
// TODO: PUT OVERRIDES FOR THESE INTO THE DB - mogery
// const manual_growth = ["22a07b64-cbfe-4924-9273-e3f01709cdf2"];
// const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6", "9661a311-3d75-45d2-bb70-71004d995873"];
// const manual_etier2c = ["77545e01-9cec-4fa9-8356-883fc66ac13e", "778c62c4-306f-4039-b372-eb20174760c0"];
function makePlanKey(plan?: string) {
return plan ? plan.replace("-", "") : "default"; // "default"
}
export function getRateLimiterPoints(
mode: RateLimiterMode,
token?: string,
plan?: string,
teamId?: string,
): number {
const rateLimitConfig = RATE_LIMITS[mode]; // {default : 5}
if (!rateLimitConfig) return RATE_LIMITS.account.default;
const points: number =
rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default; // 5
return points;
}
const fallbackRateLimits: AuthCreditUsageChunk["rate_limits"] = {
crawl: 15,
scrape: 100,
search: 100,
map: 100,
extract: 100,
preview: 25,
extractStatus: 25000,
crawlStatus: 25000,
};
export function getRateLimiter(
mode: RateLimiterMode,
token?: string,
plan?: string,
teamId?: string,
rate_limits: AuthCreditUsageChunk["rate_limits"] | null,
): RateLimiterRedis {
if (token && testSuiteTokens.some((testToken) => token.includes(testToken))) {
return testSuiteRateLimiter;
}
if (teamId && teamId === process.env.DEV_B_TEAM_ID) {
return devBRateLimiter;
}
if (teamId && (teamId === process.env.ETIER1A_TEAM_ID || teamId === process.env.ETIER1A_TEAM_ID_O)) {
return etier1aRateLimiter;
}
if (teamId && (teamId === process.env.ETIER2A_TEAM_ID || teamId === process.env.ETIER2A_TEAM_ID_B)) {
return etier2aRateLimiter;
}
if (teamId && teamId === process.env.ETIER2D_TEAM_ID) {
return etier2aRateLimiter;
}
if (teamId && (manual.includes(teamId) || manual_etier2c.includes(teamId))) {
return manualRateLimiter;
}
return createRateLimiter(
`${mode}-${makePlanKey(plan)}`,
getRateLimiterPoints(mode, token, plan, teamId),
`${mode}`,
(rate_limits ?? fallbackRateLimits)[mode] ?? 500,
);
}
export function getConcurrencyLimitMax(
plan: PlanType,
teamId?: string,
): number {
// Moved this to auth check, plan will come as testSuite if token is present
// if (token && testSuiteTokens.some((testToken) => token.includes(testToken))) {
// return CONCURRENCY_LIMIT.testSuite;
// }
if (teamId && teamId === process.env.DEV_B_TEAM_ID) {
return CONCURRENCY_LIMIT.devB;
}
if (teamId && (teamId === process.env.ETIER1A_TEAM_ID || teamId === process.env.ETIER1A_TEAM_ID_O)) {
return CONCURRENCY_LIMIT.etier1a;
}
if (teamId && (teamId === process.env.ETIER2A_TEAM_ID || teamId === process.env.ETIER2A_TEAM_ID_B)) {
return CONCURRENCY_LIMIT.etier2a;
}
if (teamId && teamId === process.env.ETIER2D_TEAM_ID) {
return CONCURRENCY_LIMIT.etier2a;
}
if (teamId && manual.includes(teamId)) {
return CONCURRENCY_LIMIT.manual;
}
if (teamId && manual_etier2c.includes(teamId)) {
return CONCURRENCY_LIMIT.etier2c;
}
if (teamId && manual_growth.includes(teamId)) {
return CONCURRENCY_LIMIT.growth;
}
return CONCURRENCY_LIMIT[plan] ?? 10;
}
export function isTestSuiteToken(token: string): boolean {
return testSuiteTokens.some((testToken) => token.includes(testToken));
}

View File

@ -37,7 +37,6 @@ export interface WebScraperOptions {
scrapeOptions: ScrapeOptions;
internalOptions?: InternalOptions;
team_id: string;
plan: string;
origin?: string;
crawl_id?: string;
sitemapped?: boolean;
@ -144,7 +143,6 @@ export type AuthResponse =
success: true;
team_id: string;
api_key?: string;
plan?: PlanType;
chunk: AuthCreditUsageChunk | null;
}
| {
@ -178,30 +176,6 @@ export type ScrapeLog = {
ipv6_support?: boolean | null;
};
export type PlanType =
| "starter"
| "standard"
| "scale"
| "hobby"
| "standardnew"
| "standardNew"
| "growth"
| "growthdouble"
| "etier2c"
| "etier1a"
| "etierscale1"
| "etierscale2"
| "etier2a"
| "free"
| "testSuite"
| "devB"
| "etier2d"
| "manual"
| "extract_starter"
| "extract_explorer"
| "extract_pro"
| "";
export type WebhookEventType =
| "crawl.page"
| "batch_scrape.page"