Nick: bypass billing

This commit is contained in:
Nicolas 2025-06-02 21:51:46 -03:00
parent bf919ceb82
commit 8967b31465
6 changed files with 87 additions and 54 deletions

View File

@ -54,6 +54,7 @@ export async function scrapeController(
saveScrapeResultToGCS: process.env.GCS_FIRE_ENGINE_BUCKET_NAME ? true : false,
unnormalizedSourceURL: preNormalizedBody.url,
useCache: req.body.__experimental_cache ? true : false,
bypassBilling: isDirectToBullMQ,
},
origin: req.body.origin,
startTime,
@ -133,6 +134,7 @@ export async function scrapeController(
}
}
return res.status(200).json({
success: true,
data: doc,

View File

@ -40,24 +40,24 @@ export async function searchAndScrapeSearchResult(
try {
const searchResults = await search({
query,
num_results: 5
});
num_results: 5,
});
const documents = await Promise.all(
searchResults.map(result =>
scrapeSearchResult(
{
url: result.url,
title: result.title,
description: result.description
},
options,
logger,
costTracking,
flags
)
)
);
const documents = await Promise.all(
searchResults.map((result) =>
scrapeSearchResult(
{
url: result.url,
title: result.title,
description: result.description,
},
options,
logger,
costTracking,
flags,
),
),
);
return documents;
} catch (error) {
@ -112,7 +112,7 @@ async function scrapeSearchResult(
);
const doc: Document = await waitForJob(jobId, options.timeout);
logger.info("Scrape job completed", {
scrapeId: jobId,
url: searchResult.url,
@ -171,6 +171,8 @@ export async function searchController(
};
const startTime = new Date().getTime();
const costTracking = new CostTracking();
const isSearchPreview =
process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
try {
req.body = searchRequestSchema.parse(req.body);
@ -199,7 +201,9 @@ export async function searchController(
});
if (req.body.ignoreInvalidURLs) {
searchResults = searchResults.filter((result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null));
searchResults = searchResults.filter(
(result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null),
);
}
logger.info("Searching completed", {
@ -226,12 +230,19 @@ export async function searchController(
} else {
logger.info("Scraping search results");
const scrapePromises = searchResults.map((result) =>
scrapeSearchResult(result, {
teamId: req.auth.team_id,
origin: req.body.origin,
timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions,
}, logger, costTracking, req.acuc?.flags ?? null, (req.acuc?.price_credits ?? 0) <= 3000),
scrapeSearchResult(
result,
{
teamId: req.auth.team_id,
origin: req.body.origin,
timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions,
},
logger,
costTracking,
req.acuc?.flags ?? null,
(req.acuc?.price_credits ?? 0) <= 3000,
),
);
const docs = await Promise.all(scrapePromises);
@ -257,17 +268,23 @@ export async function searchController(
}
// Bill team once for all successful results
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.reduce((a,x) => {
if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
return a + x.metadata.numPages;
} else {
return a + 1;
}
}, 0)).catch((error) => {
logger.error(
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
);
});
if (!isSearchPreview) {
billTeam(
req.auth.team_id,
req.acuc?.sub_id,
responseData.data.reduce((a, x) => {
if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
return a + x.metadata.numPages;
} else {
return a + 1;
}
}, 0),
).catch((error) => {
logger.error(
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
);
});
}
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
@ -277,22 +294,25 @@ export async function searchController(
time_taken: timeTakenInSeconds,
});
logJob({
job_id: jobId,
success: true,
num_docs: responseData.data.length,
docs: responseData.data,
time_taken: timeTakenInSeconds,
team_id: req.auth.team_id,
mode: "search",
url: req.body.query,
scrapeOptions: req.body.scrapeOptions,
origin: req.body.origin,
cost_tracking: costTracking,
});
logJob(
{
job_id: jobId,
success: true,
num_docs: responseData.data.length,
docs: responseData.data,
time_taken: timeTakenInSeconds,
team_id: req.auth.team_id,
mode: "search",
url: req.body.query,
scrapeOptions: req.body.scrapeOptions,
origin: req.body.origin,
cost_tracking: costTracking,
},
false,
isSearchPreview,
);
return res.status(200).json(responseData);
} catch (error) {
if (
error instanceof Error &&

View File

@ -1169,6 +1169,7 @@ export const searchRequestSchema = z
origin: z.string().optional().default("api"),
timeout: z.number().int().positive().finite().safe().default(60000),
ignoreInvalidURLs: z.boolean().optional().default(false),
__searchPreviewToken: z.string().optional(),
scrapeOptions: baseScrapeOptions
.extend({
formats: z

View File

@ -189,6 +189,7 @@ export type InternalOptions = {
unnormalizedSourceURL?: string;
saveScrapeResultToGCS?: boolean; // Passed along to fire-engine
bypassBilling?: boolean;
};
export type EngineResultsTracker = {

View File

@ -21,12 +21,13 @@ function cleanOfNull<T>(x: T): T {
}
}
export async function logJob(job: FirecrawlJob, force: boolean = false) {
export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLogging: boolean = false) {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
if (!useDbAuthentication) {
return;
}
// Redact any pages that have an authorization header
// actually, Don't. we use the db to retrieve results now. this breaks authed crawls - mogery
@ -70,6 +71,10 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
await saveJobToGCS(job);
}
if (bypassLogging) {
return;
}
if (force) {
let i = 0,
done = false;

View File

@ -319,7 +319,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
scrapeOptions: sc.scrapeOptions,
crawlerOptions: sc.crawlerOptions,
origin: job.data.origin,
});
}, false, job.data.internalOptions?.bypassBilling ?? false);
logger.info("Logged crawl!");
const data = {
@ -371,8 +371,10 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
origin: job.data.origin,
},
true,
job.data.internalOptions?.bypassBilling ?? false,
);
// v1 web hooks, call when done with no data, but with event completed
if (job.data.v1 && job.data.webhook) {
callWebhook(
@ -1048,7 +1050,7 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
async function billScrapeJob(job: Job & { id: string }, document: Document, logger: Logger, costTracking?: CostTracking) {
let creditsToBeBilled: number | null = null;
if (job.data.is_scrape !== true) {
if (job.data.is_scrape !== true && !job.data.internalOptions?.bypassBilling) {
creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, document, job.id, costTracking);
if (
@ -1378,6 +1380,7 @@ async function processJob(job: Job & { id: string }, token: string) {
credits_billed,
},
true,
job.data.internalOptions?.bypassBilling ?? false,
);
if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
@ -1424,7 +1427,7 @@ async function processJob(job: Job & { id: string }, token: string) {
cost_tracking: costTracking,
pdf_num_pages: doc.metadata.numPages,
credits_billed,
});
}, false, job.data.internalOptions?.bypassBilling ?? false);
}
logger.info(`🐂 Job done ${job.id}`);
@ -1523,6 +1526,7 @@ async function processJob(job: Job & { id: string }, token: string) {
cost_tracking: costTracking,
},
true,
job.data.internalOptions?.bypassBilling ?? false,
);
return data;
}