feat(scrapeURL/pdf): bill n credits per page (FIR-1934) (#1553)

* feat(scrapeURL/pdf): bill n credits per page

* Update scrape.ts

* Update queue-worker.ts

* separate billing logi

---------

Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
Gergő Móricz 2025-05-29 16:01:08 +02:00 committed by GitHub
parent 38c96b524f
commit 93655b5c0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 53 additions and 38 deletions

View File

@ -13,6 +13,7 @@ import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import { getJobPriority } from "../../lib/job-priority";
import { getScrapeQueue } from "../../services/queue-service";
import { supabaseGetJobById } from "../../lib/supabase-jobs";
import { calculateCreditsToBeBilled } from "../../lib/scrape-billing";
export async function scrapeController(
req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
@ -132,33 +133,12 @@ export async function scrapeController(
0 // TODO: fix
: 0;
let creditsToBeBilled = 1; // Assuming 1 credit per document
if (earlyReturn) {
// Don't bill if we're early returning
return;
}
if ((req.body.extract && req.body.formats?.includes("extract")) || (req.body.formats?.includes("changeTracking") && req.body.changeTrackingOptions?.modes?.includes("json"))) {
creditsToBeBilled = 5;
}
if (req.body.agent?.model?.toLowerCase() === "fire-1" || req.body.extract?.agent?.model?.toLowerCase() === "fire-1" || req.body.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
if (process.env.USE_DB_AUTHENTICATION === "true") {
// @Nick this is a hack pushed at 2AM pls help - mogery
const job = await supabaseGetJobById(jobId);
if (!job?.cost_tracking) {
logger.warn("No cost tracking found for job", {
jobId,
});
}
creditsToBeBilled = Math.ceil((job?.cost_tracking?.totalCost ?? 1) * 1800);
} else {
creditsToBeBilled = 150;
}
}
if (doc?.metadata?.proxyUsed === "stealth") {
creditsToBeBilled += 4;
}
let creditsToBeBilled = await calculateCreditsToBeBilled(req.body, doc, jobId);
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
(error) => {

View File

@ -0,0 +1,49 @@
import { Document, ScrapeOptions } from "../controllers/v1/types";
import { supabaseGetJobById } from "./supabase-jobs";
import { logger } from "./logger";
import { CostTracking } from "./extract/extraction-service";
const creditsPerPDFPage = 1;
const stealthProxyCostBonus = 4;
export async function calculateCreditsToBeBilled(options: ScrapeOptions, document: Document, jobId: string, costTracking?: any) {
let creditsToBeBilled = 1; // Assuming 1 credit per document
if ((options.extract && options.formats?.includes("extract")) || (options.formats?.includes("changeTracking") && options.changeTrackingOptions?.modes?.includes("json"))) {
creditsToBeBilled = 5;
}
if (options.agent?.model?.toLowerCase() === "fire-1" || options.extract?.agent?.model?.toLowerCase() === "fire-1" || options.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
if (process.env.USE_DB_AUTHENTICATION === "true") {
// @Nick this is a hack pushed at 2AM pls help - mogery
if (!costTracking) {
const job = await supabaseGetJobById(jobId);
costTracking = job?.cost_tracking;
}
if (!costTracking) {
logger.warn("No cost tracking found for job", {
jobId,
scrapeId: jobId
});
}
if (costTracking instanceof CostTracking) {
costTracking = costTracking.toJSON();
}
creditsToBeBilled = Math.ceil((costTracking?.totalCost ?? 1) * 1800);
} else {
creditsToBeBilled = 150;
}
}
if (document.metadata.numPages !== undefined && document.metadata.numPages > 1) {
creditsToBeBilled += creditsPerPDFPage * (document.metadata.numPages - 1);
}
if (document?.metadata?.proxyUsed === "stealth") {
creditsToBeBilled += stealthProxyCostBonus;
}
return creditsToBeBilled;
}

View File

@ -85,6 +85,7 @@ import https from "https";
import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
import { robustFetch } from "../scraper/scrapeURL/lib/fetch";
import { RateLimiterMode } from "../types";
import { calculateCreditsToBeBilled } from "../lib/scrape-billing";
import { redisEvictConnection } from "./redis";
configDotenv();
@ -1384,22 +1385,7 @@ async function processJob(job: Job & { id: string }, token: string) {
}
if (job.data.is_scrape !== true) {
let creditsToBeBilled = 1; // Assuming 1 credit per document
if ((job.data.scrapeOptions.extract && job.data.scrapeOptions.formats?.includes("extract")) || (job.data.scrapeOptions.formats?.includes("changeTracking") && job.data.scrapeOptions.changeTrackingOptions?.modes?.includes("json"))) {
creditsToBeBilled = 5;
}
if (job.data.scrapeOptions.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.extract?.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
if (process.env.USE_DB_AUTHENTICATION === "true") {
creditsToBeBilled = Math.ceil((costTracking.toJSON().totalCost ?? 1) * 1800);
} else {
creditsToBeBilled = 150;
}
}
if (doc.metadata?.proxyUsed === "stealth") {
creditsToBeBilled += 4;
}
let creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, doc, job.id, costTracking);
if (
job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&