mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 11:46:00 +08:00
feat(scrapeURL/pdf): bill n credits per page (FIR-1934) (#1553)
* feat(scrapeURL/pdf): bill n credits per page * Update scrape.ts * Update queue-worker.ts * separate billing logi --------- Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
parent
38c96b524f
commit
93655b5c0b
@ -13,6 +13,7 @@ import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
|
|||||||
import { getJobPriority } from "../../lib/job-priority";
|
import { getJobPriority } from "../../lib/job-priority";
|
||||||
import { getScrapeQueue } from "../../services/queue-service";
|
import { getScrapeQueue } from "../../services/queue-service";
|
||||||
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
||||||
|
import { calculateCreditsToBeBilled } from "../../lib/scrape-billing";
|
||||||
|
|
||||||
export async function scrapeController(
|
export async function scrapeController(
|
||||||
req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
|
req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
|
||||||
@ -132,33 +133,12 @@ export async function scrapeController(
|
|||||||
0 // TODO: fix
|
0 // TODO: fix
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
let creditsToBeBilled = 1; // Assuming 1 credit per document
|
|
||||||
if (earlyReturn) {
|
if (earlyReturn) {
|
||||||
// Don't bill if we're early returning
|
// Don't bill if we're early returning
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ((req.body.extract && req.body.formats?.includes("extract")) || (req.body.formats?.includes("changeTracking") && req.body.changeTrackingOptions?.modes?.includes("json"))) {
|
|
||||||
creditsToBeBilled = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (req.body.agent?.model?.toLowerCase() === "fire-1" || req.body.extract?.agent?.model?.toLowerCase() === "fire-1" || req.body.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
|
let creditsToBeBilled = await calculateCreditsToBeBilled(req.body, doc, jobId);
|
||||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
|
||||||
// @Nick this is a hack pushed at 2AM pls help - mogery
|
|
||||||
const job = await supabaseGetJobById(jobId);
|
|
||||||
if (!job?.cost_tracking) {
|
|
||||||
logger.warn("No cost tracking found for job", {
|
|
||||||
jobId,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
creditsToBeBilled = Math.ceil((job?.cost_tracking?.totalCost ?? 1) * 1800);
|
|
||||||
} else {
|
|
||||||
creditsToBeBilled = 150;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doc?.metadata?.proxyUsed === "stealth") {
|
|
||||||
creditsToBeBilled += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
|
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
|
||||||
(error) => {
|
(error) => {
|
||||||
|
49
apps/api/src/lib/scrape-billing.ts
Normal file
49
apps/api/src/lib/scrape-billing.ts
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import { Document, ScrapeOptions } from "../controllers/v1/types";
|
||||||
|
import { supabaseGetJobById } from "./supabase-jobs";
|
||||||
|
import { logger } from "./logger";
|
||||||
|
import { CostTracking } from "./extract/extraction-service";
|
||||||
|
|
||||||
|
const creditsPerPDFPage = 1;
|
||||||
|
const stealthProxyCostBonus = 4;
|
||||||
|
|
||||||
|
export async function calculateCreditsToBeBilled(options: ScrapeOptions, document: Document, jobId: string, costTracking?: any) {
|
||||||
|
let creditsToBeBilled = 1; // Assuming 1 credit per document
|
||||||
|
if ((options.extract && options.formats?.includes("extract")) || (options.formats?.includes("changeTracking") && options.changeTrackingOptions?.modes?.includes("json"))) {
|
||||||
|
creditsToBeBilled = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.agent?.model?.toLowerCase() === "fire-1" || options.extract?.agent?.model?.toLowerCase() === "fire-1" || options.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
|
||||||
|
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||||
|
// @Nick this is a hack pushed at 2AM pls help - mogery
|
||||||
|
if (!costTracking) {
|
||||||
|
const job = await supabaseGetJobById(jobId);
|
||||||
|
costTracking = job?.cost_tracking;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!costTracking) {
|
||||||
|
logger.warn("No cost tracking found for job", {
|
||||||
|
jobId,
|
||||||
|
scrapeId: jobId
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (costTracking instanceof CostTracking) {
|
||||||
|
costTracking = costTracking.toJSON();
|
||||||
|
}
|
||||||
|
|
||||||
|
creditsToBeBilled = Math.ceil((costTracking?.totalCost ?? 1) * 1800);
|
||||||
|
} else {
|
||||||
|
creditsToBeBilled = 150;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document.metadata.numPages !== undefined && document.metadata.numPages > 1) {
|
||||||
|
creditsToBeBilled += creditsPerPDFPage * (document.metadata.numPages - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document?.metadata?.proxyUsed === "stealth") {
|
||||||
|
creditsToBeBilled += stealthProxyCostBonus;
|
||||||
|
}
|
||||||
|
|
||||||
|
return creditsToBeBilled;
|
||||||
|
}
|
@ -85,6 +85,7 @@ import https from "https";
|
|||||||
import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
|
import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
|
||||||
import { robustFetch } from "../scraper/scrapeURL/lib/fetch";
|
import { robustFetch } from "../scraper/scrapeURL/lib/fetch";
|
||||||
import { RateLimiterMode } from "../types";
|
import { RateLimiterMode } from "../types";
|
||||||
|
import { calculateCreditsToBeBilled } from "../lib/scrape-billing";
|
||||||
import { redisEvictConnection } from "./redis";
|
import { redisEvictConnection } from "./redis";
|
||||||
|
|
||||||
configDotenv();
|
configDotenv();
|
||||||
@ -1384,22 +1385,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (job.data.is_scrape !== true) {
|
if (job.data.is_scrape !== true) {
|
||||||
let creditsToBeBilled = 1; // Assuming 1 credit per document
|
let creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, doc, job.id, costTracking);
|
||||||
if ((job.data.scrapeOptions.extract && job.data.scrapeOptions.formats?.includes("extract")) || (job.data.scrapeOptions.formats?.includes("changeTracking") && job.data.scrapeOptions.changeTrackingOptions?.modes?.includes("json"))) {
|
|
||||||
creditsToBeBilled = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (job.data.scrapeOptions.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.extract?.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
|
|
||||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
|
||||||
creditsToBeBilled = Math.ceil((costTracking.toJSON().totalCost ?? 1) * 1800);
|
|
||||||
} else {
|
|
||||||
creditsToBeBilled = 150;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doc.metadata?.proxyUsed === "stealth") {
|
|
||||||
creditsToBeBilled += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&
|
job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&
|
||||||
|
Loading…
x
Reference in New Issue
Block a user