From 04916f17e2b8a8eaf1a8f0c64ff9d25867cc8fa6 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 21 Jan 2025 19:17:06 -0300 Subject: [PATCH] Nick: bug fixes + acuc fixes + cache fixes --- apps/api/src/controllers/auth.ts | 31 ++++++++++++++++--- apps/api/src/index.ts | 2 +- apps/api/src/lib/cache.ts | 5 +++ .../scraper/scrapeURL/engines/cache/index.ts | 8 +++++ apps/api/src/services/rate-limiter.ts | 19 ++++++++++++ apps/api/src/types.ts | 3 ++ 6 files changed, 62 insertions(+), 6 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 16622bb0..22c75232 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -105,6 +105,7 @@ export async function getACUC( { get: true }, )); + if (!error) { break; } @@ -139,8 +140,17 @@ export async function getACUC( } export async function clearACUC(api_key: string): Promise { - const cacheKeyACUC = `acuc_${api_key}`; - await deleteKey(cacheKeyACUC); + // Delete cache for all rate limiter modes + const modes = Object.values(RateLimiterMode); + await Promise.all( + modes.map(async (mode) => { + const cacheKey = `acuc_${api_key}_${mode}`; + await deleteKey(cacheKey); + }) + ); + + // Also clear the base cache key + await deleteKey(`acuc_${api_key}`); } export async function authenticateUser( @@ -188,7 +198,7 @@ export async function supaAuthenticateUser( let teamId: string | null = null; let priceId: string | null = null; let chunk: AuthCreditUsageChunk | null = null; - + let plan: PlanType = "free"; if (token == "this_is_just_a_preview_token") { if (mode == RateLimiterMode.CrawlStatus) { rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token); @@ -198,6 +208,7 @@ export async function supaAuthenticateUser( rateLimiter = getRateLimiter(RateLimiterMode.Preview, token); } teamId = "preview"; + plan = "free"; } else { normalizedApi = parseApi(token); if (!normalizedApiIsUuid(normalizedApi)) { @@ -221,7 +232,8 @@ export async function supaAuthenticateUser( teamId = chunk.team_id; priceId = chunk.price_id; - const plan = getPlanByPriceId(priceId); + + plan = getPlanByPriceId(priceId); subscriptionData = { team_id: teamId, plan, @@ -322,7 +334,7 @@ export async function supaAuthenticateUser( mode === RateLimiterMode.Extract || mode === RateLimiterMode.Search) ) { - return { success: true, team_id: "preview", chunk: null }; + return { success: true, team_id: "preview", chunk: null, plan: "free" }; // check the origin of the request and make sure its from firecrawl.dev // const origin = req.headers.origin; // if (origin && origin.includes("firecrawl.dev")){ @@ -369,6 +381,15 @@ function getPlanByPriceId(price_id: string | null): PlanType { case process.env.STRIPE_PRICE_ID_ETIER_SCALE_1_MONTHLY: case process.env.STRIPE_PRICE_ID_ETIER_SCALE_1_YEARLY: return "etierscale1"; + case process.env.STRIPE_PRICE_ID_EXTRACT_STARTER_MONTHLY: + case process.env.STRIPE_PRICE_ID_EXTRACT_STARTER_YEARLY: + return "extract_starter"; + case process.env.STRIPE_PRICE_ID_EXTRACT_EXPLORER_MONTHLY: + case process.env.STRIPE_PRICE_ID_EXTRACT_EXPLORER_YEARLY: + return "extract_explorer"; + case process.env.STRIPE_PRICE_ID_EXTRACT_PRO_MONTHLY: + case process.env.STRIPE_PRICE_ID_EXTRACT_PRO_YEARLY: + return "extract_pro"; default: return "free"; } diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index c1dc4c04..b4eff93f 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -17,6 +17,7 @@ import expressWs from "express-ws"; import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types"; import { ZodError } from "zod"; import { v4 as uuidv4 } from "uuid"; +import { RateLimiterMode } from "./types"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); @@ -249,7 +250,6 @@ app.use( ); logger.info(`Worker ${process.pid} started`); - // const sq = getScrapeQueue(); // sq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting")); diff --git a/apps/api/src/lib/cache.ts b/apps/api/src/lib/cache.ts index 85e57e51..611bc043 100644 --- a/apps/api/src/lib/cache.ts +++ b/apps/api/src/lib/cache.ts @@ -41,6 +41,11 @@ export type CacheEntry = { export async function saveEntryToCache(key: string, entry: CacheEntry) { if (!cacheRedis) return; + if (!entry.html || entry.html.length < 100) { + logger.warn("Skipping cache save for short HTML", { key, htmlLength: entry.html?.length }); + return; + } + try { await cacheRedis.set(key, JSON.stringify(entry), "EX", 3600); // 1 hour in seconds } catch (error) { diff --git a/apps/api/src/scraper/scrapeURL/engines/cache/index.ts b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts index c0451df4..31075f92 100644 --- a/apps/api/src/scraper/scrapeURL/engines/cache/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts @@ -10,6 +10,14 @@ export async function scrapeCache(meta: Meta): Promise { const entry = await getEntryFromCache(key); if (entry === null) throw new EngineError("Cache missed"); + if (!entry.html) { + throw new EngineError("Cache hit but HTML is missing"); + } + if (entry.html.length < 100) { + throw new EngineError("Cache hit but HTML is too short to be useful"); + } + + // Set fromCache flag to indicate this document was retrieved from cache meta.internalOptions.fromCache = true; diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 09c8d749..68cea8fa 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -19,6 +19,10 @@ const RATE_LIMITS = { etier1a: 1000, etier2a: 300, etierscale1: 150, + // extract ops + extract_starter: 20, + extract_explorer: 100, + extract_pro: 1000, }, scrape: { default: 20, @@ -36,6 +40,10 @@ const RATE_LIMITS = { etier1a: 1000, etier2a: 2500, etierscale1: 1500, + // extract ops + extract_starter: 20, + extract_explorer: 100, + extract_pro: 1000, }, search: { default: 20, @@ -53,6 +61,10 @@ const RATE_LIMITS = { etier1a: 1000, etier2a: 2500, etierscale1: 1500, + // extract ops + extract_starter: 20, + extract_explorer: 100, + extract_pro: 1000, }, map: { default: 20, @@ -70,6 +82,10 @@ const RATE_LIMITS = { etier1a: 1000, etier2a: 2500, etierscale1: 1500, + // extract ops + extract_starter: 20, + extract_explorer: 100, + extract_pro: 1000, }, extract: { default: 20, @@ -87,6 +103,9 @@ const RATE_LIMITS = { etier1a: 1000, etier2a: 1000, etierscale1: 1000, + extract_starter: 20, + extract_explorer: 100, + extract_pro: 1000, }, preview: { free: 5, diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index ce535ede..a894c329 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -186,6 +186,9 @@ export type PlanType = | "etier1a" | "etierscale1" | "free" + | "extract_starter" + | "extract_explorer" + | "extract_pro" | ""; export type WebhookEventType =