Nick: bypass billing

This commit is contained in:
Nicolas 2025-06-02 21:51:46 -03:00
parent bf919ceb82
commit 8967b31465
6 changed files with 87 additions and 54 deletions

View File

@ -54,6 +54,7 @@ export async function scrapeController(
saveScrapeResultToGCS: process.env.GCS_FIRE_ENGINE_BUCKET_NAME ? true : false, saveScrapeResultToGCS: process.env.GCS_FIRE_ENGINE_BUCKET_NAME ? true : false,
unnormalizedSourceURL: preNormalizedBody.url, unnormalizedSourceURL: preNormalizedBody.url,
useCache: req.body.__experimental_cache ? true : false, useCache: req.body.__experimental_cache ? true : false,
bypassBilling: isDirectToBullMQ,
}, },
origin: req.body.origin, origin: req.body.origin,
startTime, startTime,
@ -133,6 +134,7 @@ export async function scrapeController(
} }
} }
return res.status(200).json({ return res.status(200).json({
success: true, success: true,
data: doc, data: doc,

View File

@ -40,23 +40,23 @@ export async function searchAndScrapeSearchResult(
try { try {
const searchResults = await search({ const searchResults = await search({
query, query,
num_results: 5 num_results: 5,
}); });
const documents = await Promise.all( const documents = await Promise.all(
searchResults.map(result => searchResults.map((result) =>
scrapeSearchResult( scrapeSearchResult(
{ {
url: result.url, url: result.url,
title: result.title, title: result.title,
description: result.description description: result.description,
}, },
options, options,
logger, logger,
costTracking, costTracking,
flags flags,
) ),
) ),
); );
return documents; return documents;
@ -171,6 +171,8 @@ export async function searchController(
}; };
const startTime = new Date().getTime(); const startTime = new Date().getTime();
const costTracking = new CostTracking(); const costTracking = new CostTracking();
const isSearchPreview =
process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
try { try {
req.body = searchRequestSchema.parse(req.body); req.body = searchRequestSchema.parse(req.body);
@ -199,7 +201,9 @@ export async function searchController(
}); });
if (req.body.ignoreInvalidURLs) { if (req.body.ignoreInvalidURLs) {
searchResults = searchResults.filter((result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null)); searchResults = searchResults.filter(
(result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null),
);
} }
logger.info("Searching completed", { logger.info("Searching completed", {
@ -226,12 +230,19 @@ export async function searchController(
} else { } else {
logger.info("Scraping search results"); logger.info("Scraping search results");
const scrapePromises = searchResults.map((result) => const scrapePromises = searchResults.map((result) =>
scrapeSearchResult(result, { scrapeSearchResult(
result,
{
teamId: req.auth.team_id, teamId: req.auth.team_id,
origin: req.body.origin, origin: req.body.origin,
timeout: req.body.timeout, timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions, scrapeOptions: req.body.scrapeOptions,
}, logger, costTracking, req.acuc?.flags ?? null, (req.acuc?.price_credits ?? 0) <= 3000), },
logger,
costTracking,
req.acuc?.flags ?? null,
(req.acuc?.price_credits ?? 0) <= 3000,
),
); );
const docs = await Promise.all(scrapePromises); const docs = await Promise.all(scrapePromises);
@ -257,17 +268,23 @@ export async function searchController(
} }
// Bill team once for all successful results // Bill team once for all successful results
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.reduce((a,x) => { if (!isSearchPreview) {
billTeam(
req.auth.team_id,
req.acuc?.sub_id,
responseData.data.reduce((a, x) => {
if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) { if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
return a + x.metadata.numPages; return a + x.metadata.numPages;
} else { } else {
return a + 1; return a + 1;
} }
}, 0)).catch((error) => { }, 0),
).catch((error) => {
logger.error( logger.error(
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`, `Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
); );
}); });
}
const endTime = new Date().getTime(); const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000; const timeTakenInSeconds = (endTime - startTime) / 1000;
@ -277,7 +294,8 @@ export async function searchController(
time_taken: timeTakenInSeconds, time_taken: timeTakenInSeconds,
}); });
logJob({ logJob(
{
job_id: jobId, job_id: jobId,
success: true, success: true,
num_docs: responseData.data.length, num_docs: responseData.data.length,
@ -289,10 +307,12 @@ export async function searchController(
scrapeOptions: req.body.scrapeOptions, scrapeOptions: req.body.scrapeOptions,
origin: req.body.origin, origin: req.body.origin,
cost_tracking: costTracking, cost_tracking: costTracking,
}); },
false,
isSearchPreview,
);
return res.status(200).json(responseData); return res.status(200).json(responseData);
} catch (error) { } catch (error) {
if ( if (
error instanceof Error && error instanceof Error &&

View File

@ -1169,6 +1169,7 @@ export const searchRequestSchema = z
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
timeout: z.number().int().positive().finite().safe().default(60000), timeout: z.number().int().positive().finite().safe().default(60000),
ignoreInvalidURLs: z.boolean().optional().default(false), ignoreInvalidURLs: z.boolean().optional().default(false),
__searchPreviewToken: z.string().optional(),
scrapeOptions: baseScrapeOptions scrapeOptions: baseScrapeOptions
.extend({ .extend({
formats: z formats: z

View File

@ -189,6 +189,7 @@ export type InternalOptions = {
unnormalizedSourceURL?: string; unnormalizedSourceURL?: string;
saveScrapeResultToGCS?: boolean; // Passed along to fire-engine saveScrapeResultToGCS?: boolean; // Passed along to fire-engine
bypassBilling?: boolean;
}; };
export type EngineResultsTracker = { export type EngineResultsTracker = {

View File

@ -21,13 +21,14 @@ function cleanOfNull<T>(x: T): T {
} }
} }
export async function logJob(job: FirecrawlJob, force: boolean = false) { export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLogging: boolean = false) {
try { try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true"; const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
if (!useDbAuthentication) { if (!useDbAuthentication) {
return; return;
} }
// Redact any pages that have an authorization header // Redact any pages that have an authorization header
// actually, Don't. we use the db to retrieve results now. this breaks authed crawls - mogery // actually, Don't. we use the db to retrieve results now. this breaks authed crawls - mogery
// if ( // if (
@ -70,6 +71,10 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
await saveJobToGCS(job); await saveJobToGCS(job);
} }
if (bypassLogging) {
return;
}
if (force) { if (force) {
let i = 0, let i = 0,
done = false; done = false;

View File

@ -319,7 +319,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
scrapeOptions: sc.scrapeOptions, scrapeOptions: sc.scrapeOptions,
crawlerOptions: sc.crawlerOptions, crawlerOptions: sc.crawlerOptions,
origin: job.data.origin, origin: job.data.origin,
}); }, false, job.data.internalOptions?.bypassBilling ?? false);
logger.info("Logged crawl!"); logger.info("Logged crawl!");
const data = { const data = {
@ -371,8 +371,10 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
origin: job.data.origin, origin: job.data.origin,
}, },
true, true,
job.data.internalOptions?.bypassBilling ?? false,
); );
// v1 web hooks, call when done with no data, but with event completed // v1 web hooks, call when done with no data, but with event completed
if (job.data.v1 && job.data.webhook) { if (job.data.v1 && job.data.webhook) {
callWebhook( callWebhook(
@ -1048,7 +1050,7 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
async function billScrapeJob(job: Job & { id: string }, document: Document, logger: Logger, costTracking?: CostTracking) { async function billScrapeJob(job: Job & { id: string }, document: Document, logger: Logger, costTracking?: CostTracking) {
let creditsToBeBilled: number | null = null; let creditsToBeBilled: number | null = null;
if (job.data.is_scrape !== true) { if (job.data.is_scrape !== true && !job.data.internalOptions?.bypassBilling) {
creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, document, job.id, costTracking); creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, document, job.id, costTracking);
if ( if (
@ -1378,6 +1380,7 @@ async function processJob(job: Job & { id: string }, token: string) {
credits_billed, credits_billed,
}, },
true, true,
job.data.internalOptions?.bypassBilling ?? false,
); );
if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) { if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
@ -1424,7 +1427,7 @@ async function processJob(job: Job & { id: string }, token: string) {
cost_tracking: costTracking, cost_tracking: costTracking,
pdf_num_pages: doc.metadata.numPages, pdf_num_pages: doc.metadata.numPages,
credits_billed, credits_billed,
}); }, false, job.data.internalOptions?.bypassBilling ?? false);
} }
logger.info(`🐂 Job done ${job.id}`); logger.info(`🐂 Job done ${job.id}`);
@ -1523,6 +1526,7 @@ async function processJob(job: Job & { id: string }, token: string) {
cost_tracking: costTracking, cost_tracking: costTracking,
}, },
true, true,
job.data.internalOptions?.bypassBilling ?? false,
); );
return data; return data;
} }