From 93655b5c0ba96ce39f4414de0594a170672e34ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 29 May 2025 16:01:08 +0200 Subject: [PATCH] feat(scrapeURL/pdf): bill n credits per page (FIR-1934) (#1553) * feat(scrapeURL/pdf): bill n credits per page * Update scrape.ts * Update queue-worker.ts * separate billing logi --------- Co-authored-by: Nicolas --- apps/api/src/controllers/v1/scrape.ts | 24 ++----------- apps/api/src/lib/scrape-billing.ts | 49 +++++++++++++++++++++++++++ apps/api/src/services/queue-worker.ts | 18 ++-------- 3 files changed, 53 insertions(+), 38 deletions(-) create mode 100644 apps/api/src/lib/scrape-billing.ts diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index 092d86e8..a75a58d1 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -13,6 +13,7 @@ import { addScrapeJob, waitForJob } from "../../services/queue-jobs"; import { getJobPriority } from "../../lib/job-priority"; import { getScrapeQueue } from "../../services/queue-service"; import { supabaseGetJobById } from "../../lib/supabase-jobs"; +import { calculateCreditsToBeBilled } from "../../lib/scrape-billing"; export async function scrapeController( req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>, @@ -132,33 +133,12 @@ export async function scrapeController( 0 // TODO: fix : 0; - let creditsToBeBilled = 1; // Assuming 1 credit per document if (earlyReturn) { // Don't bill if we're early returning return; } - if ((req.body.extract && req.body.formats?.includes("extract")) || (req.body.formats?.includes("changeTracking") && req.body.changeTrackingOptions?.modes?.includes("json"))) { - creditsToBeBilled = 5; - } - if (req.body.agent?.model?.toLowerCase() === "fire-1" || req.body.extract?.agent?.model?.toLowerCase() === "fire-1" || req.body.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") { - if (process.env.USE_DB_AUTHENTICATION === "true") { - // @Nick this is a hack pushed at 2AM pls help - mogery - const job = await supabaseGetJobById(jobId); - if (!job?.cost_tracking) { - logger.warn("No cost tracking found for job", { - jobId, - }); - } - creditsToBeBilled = Math.ceil((job?.cost_tracking?.totalCost ?? 1) * 1800); - } else { - creditsToBeBilled = 150; - } - } - - if (doc?.metadata?.proxyUsed === "stealth") { - creditsToBeBilled += 4; - } + let creditsToBeBilled = await calculateCreditsToBeBilled(req.body, doc, jobId); billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch( (error) => { diff --git a/apps/api/src/lib/scrape-billing.ts b/apps/api/src/lib/scrape-billing.ts new file mode 100644 index 00000000..aabe42b0 --- /dev/null +++ b/apps/api/src/lib/scrape-billing.ts @@ -0,0 +1,49 @@ +import { Document, ScrapeOptions } from "../controllers/v1/types"; +import { supabaseGetJobById } from "./supabase-jobs"; +import { logger } from "./logger"; +import { CostTracking } from "./extract/extraction-service"; + +const creditsPerPDFPage = 1; +const stealthProxyCostBonus = 4; + +export async function calculateCreditsToBeBilled(options: ScrapeOptions, document: Document, jobId: string, costTracking?: any) { + let creditsToBeBilled = 1; // Assuming 1 credit per document + if ((options.extract && options.formats?.includes("extract")) || (options.formats?.includes("changeTracking") && options.changeTrackingOptions?.modes?.includes("json"))) { + creditsToBeBilled = 5; + } + + if (options.agent?.model?.toLowerCase() === "fire-1" || options.extract?.agent?.model?.toLowerCase() === "fire-1" || options.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") { + if (process.env.USE_DB_AUTHENTICATION === "true") { + // @Nick this is a hack pushed at 2AM pls help - mogery + if (!costTracking) { + const job = await supabaseGetJobById(jobId); + costTracking = job?.cost_tracking; + } + + if (!costTracking) { + logger.warn("No cost tracking found for job", { + jobId, + scrapeId: jobId + }); + } + + if (costTracking instanceof CostTracking) { + costTracking = costTracking.toJSON(); + } + + creditsToBeBilled = Math.ceil((costTracking?.totalCost ?? 1) * 1800); + } else { + creditsToBeBilled = 150; + } + } + + if (document.metadata.numPages !== undefined && document.metadata.numPages > 1) { + creditsToBeBilled += creditsPerPDFPage * (document.metadata.numPages - 1); + } + + if (document?.metadata?.proxyUsed === "stealth") { + creditsToBeBilled += stealthProxyCostBonus; + } + + return creditsToBeBilled; +} \ No newline at end of file diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 44503732..9624cfda 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -85,6 +85,7 @@ import https from "https"; import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup"; import { robustFetch } from "../scraper/scrapeURL/lib/fetch"; import { RateLimiterMode } from "../types"; +import { calculateCreditsToBeBilled } from "../lib/scrape-billing"; import { redisEvictConnection } from "./redis"; configDotenv(); @@ -1384,22 +1385,7 @@ async function processJob(job: Job & { id: string }, token: string) { } if (job.data.is_scrape !== true) { - let creditsToBeBilled = 1; // Assuming 1 credit per document - if ((job.data.scrapeOptions.extract && job.data.scrapeOptions.formats?.includes("extract")) || (job.data.scrapeOptions.formats?.includes("changeTracking") && job.data.scrapeOptions.changeTrackingOptions?.modes?.includes("json"))) { - creditsToBeBilled = 5; - } - - if (job.data.scrapeOptions.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.extract?.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") { - if (process.env.USE_DB_AUTHENTICATION === "true") { - creditsToBeBilled = Math.ceil((costTracking.toJSON().totalCost ?? 1) * 1800); - } else { - creditsToBeBilled = 150; - } - } - - if (doc.metadata?.proxyUsed === "stealth") { - creditsToBeBilled += 4; - } + let creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, doc, job.id, costTracking); if ( job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&