mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 22:39:01 +08:00
Nick: bypass billing
This commit is contained in:
parent
bf919ceb82
commit
8967b31465
@ -54,6 +54,7 @@ export async function scrapeController(
|
||||
saveScrapeResultToGCS: process.env.GCS_FIRE_ENGINE_BUCKET_NAME ? true : false,
|
||||
unnormalizedSourceURL: preNormalizedBody.url,
|
||||
useCache: req.body.__experimental_cache ? true : false,
|
||||
bypassBilling: isDirectToBullMQ,
|
||||
},
|
||||
origin: req.body.origin,
|
||||
startTime,
|
||||
@ -133,6 +134,7 @@ export async function scrapeController(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
data: doc,
|
||||
|
@ -40,24 +40,24 @@ export async function searchAndScrapeSearchResult(
|
||||
try {
|
||||
const searchResults = await search({
|
||||
query,
|
||||
num_results: 5
|
||||
});
|
||||
num_results: 5,
|
||||
});
|
||||
|
||||
const documents = await Promise.all(
|
||||
searchResults.map(result =>
|
||||
scrapeSearchResult(
|
||||
{
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
description: result.description
|
||||
},
|
||||
options,
|
||||
logger,
|
||||
costTracking,
|
||||
flags
|
||||
)
|
||||
)
|
||||
);
|
||||
const documents = await Promise.all(
|
||||
searchResults.map((result) =>
|
||||
scrapeSearchResult(
|
||||
{
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
description: result.description,
|
||||
},
|
||||
options,
|
||||
logger,
|
||||
costTracking,
|
||||
flags,
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
return documents;
|
||||
} catch (error) {
|
||||
@ -112,7 +112,7 @@ async function scrapeSearchResult(
|
||||
);
|
||||
|
||||
const doc: Document = await waitForJob(jobId, options.timeout);
|
||||
|
||||
|
||||
logger.info("Scrape job completed", {
|
||||
scrapeId: jobId,
|
||||
url: searchResult.url,
|
||||
@ -171,6 +171,8 @@ export async function searchController(
|
||||
};
|
||||
const startTime = new Date().getTime();
|
||||
const costTracking = new CostTracking();
|
||||
const isSearchPreview =
|
||||
process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||
|
||||
try {
|
||||
req.body = searchRequestSchema.parse(req.body);
|
||||
@ -199,7 +201,9 @@ export async function searchController(
|
||||
});
|
||||
|
||||
if (req.body.ignoreInvalidURLs) {
|
||||
searchResults = searchResults.filter((result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null));
|
||||
searchResults = searchResults.filter(
|
||||
(result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null),
|
||||
);
|
||||
}
|
||||
|
||||
logger.info("Searching completed", {
|
||||
@ -226,12 +230,19 @@ export async function searchController(
|
||||
} else {
|
||||
logger.info("Scraping search results");
|
||||
const scrapePromises = searchResults.map((result) =>
|
||||
scrapeSearchResult(result, {
|
||||
teamId: req.auth.team_id,
|
||||
origin: req.body.origin,
|
||||
timeout: req.body.timeout,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
}, logger, costTracking, req.acuc?.flags ?? null, (req.acuc?.price_credits ?? 0) <= 3000),
|
||||
scrapeSearchResult(
|
||||
result,
|
||||
{
|
||||
teamId: req.auth.team_id,
|
||||
origin: req.body.origin,
|
||||
timeout: req.body.timeout,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
},
|
||||
logger,
|
||||
costTracking,
|
||||
req.acuc?.flags ?? null,
|
||||
(req.acuc?.price_credits ?? 0) <= 3000,
|
||||
),
|
||||
);
|
||||
|
||||
const docs = await Promise.all(scrapePromises);
|
||||
@ -257,17 +268,23 @@ export async function searchController(
|
||||
}
|
||||
|
||||
// Bill team once for all successful results
|
||||
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.reduce((a,x) => {
|
||||
if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
|
||||
return a + x.metadata.numPages;
|
||||
} else {
|
||||
return a + 1;
|
||||
}
|
||||
}, 0)).catch((error) => {
|
||||
logger.error(
|
||||
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
|
||||
);
|
||||
});
|
||||
if (!isSearchPreview) {
|
||||
billTeam(
|
||||
req.auth.team_id,
|
||||
req.acuc?.sub_id,
|
||||
responseData.data.reduce((a, x) => {
|
||||
if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
|
||||
return a + x.metadata.numPages;
|
||||
} else {
|
||||
return a + 1;
|
||||
}
|
||||
}, 0),
|
||||
).catch((error) => {
|
||||
logger.error(
|
||||
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
const endTime = new Date().getTime();
|
||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||
@ -277,22 +294,25 @@ export async function searchController(
|
||||
time_taken: timeTakenInSeconds,
|
||||
});
|
||||
|
||||
logJob({
|
||||
job_id: jobId,
|
||||
success: true,
|
||||
num_docs: responseData.data.length,
|
||||
docs: responseData.data,
|
||||
time_taken: timeTakenInSeconds,
|
||||
team_id: req.auth.team_id,
|
||||
mode: "search",
|
||||
url: req.body.query,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
origin: req.body.origin,
|
||||
cost_tracking: costTracking,
|
||||
});
|
||||
logJob(
|
||||
{
|
||||
job_id: jobId,
|
||||
success: true,
|
||||
num_docs: responseData.data.length,
|
||||
docs: responseData.data,
|
||||
time_taken: timeTakenInSeconds,
|
||||
team_id: req.auth.team_id,
|
||||
mode: "search",
|
||||
url: req.body.query,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
origin: req.body.origin,
|
||||
cost_tracking: costTracking,
|
||||
},
|
||||
false,
|
||||
isSearchPreview,
|
||||
);
|
||||
|
||||
return res.status(200).json(responseData);
|
||||
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof Error &&
|
||||
|
@ -1169,6 +1169,7 @@ export const searchRequestSchema = z
|
||||
origin: z.string().optional().default("api"),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
ignoreInvalidURLs: z.boolean().optional().default(false),
|
||||
__searchPreviewToken: z.string().optional(),
|
||||
scrapeOptions: baseScrapeOptions
|
||||
.extend({
|
||||
formats: z
|
||||
|
@ -189,6 +189,7 @@ export type InternalOptions = {
|
||||
unnormalizedSourceURL?: string;
|
||||
|
||||
saveScrapeResultToGCS?: boolean; // Passed along to fire-engine
|
||||
bypassBilling?: boolean;
|
||||
};
|
||||
|
||||
export type EngineResultsTracker = {
|
||||
|
@ -21,12 +21,13 @@ function cleanOfNull<T>(x: T): T {
|
||||
}
|
||||
}
|
||||
|
||||
export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
||||
export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLogging: boolean = false) {
|
||||
try {
|
||||
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
||||
if (!useDbAuthentication) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Redact any pages that have an authorization header
|
||||
// actually, Don't. we use the db to retrieve results now. this breaks authed crawls - mogery
|
||||
@ -70,6 +71,10 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
||||
await saveJobToGCS(job);
|
||||
}
|
||||
|
||||
if (bypassLogging) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (force) {
|
||||
let i = 0,
|
||||
done = false;
|
||||
|
@ -319,7 +319,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
scrapeOptions: sc.scrapeOptions,
|
||||
crawlerOptions: sc.crawlerOptions,
|
||||
origin: job.data.origin,
|
||||
});
|
||||
}, false, job.data.internalOptions?.bypassBilling ?? false);
|
||||
logger.info("Logged crawl!");
|
||||
|
||||
const data = {
|
||||
@ -371,8 +371,10 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
origin: job.data.origin,
|
||||
},
|
||||
true,
|
||||
job.data.internalOptions?.bypassBilling ?? false,
|
||||
);
|
||||
|
||||
|
||||
// v1 web hooks, call when done with no data, but with event completed
|
||||
if (job.data.v1 && job.data.webhook) {
|
||||
callWebhook(
|
||||
@ -1048,7 +1050,7 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
|
||||
async function billScrapeJob(job: Job & { id: string }, document: Document, logger: Logger, costTracking?: CostTracking) {
|
||||
let creditsToBeBilled: number | null = null;
|
||||
|
||||
if (job.data.is_scrape !== true) {
|
||||
if (job.data.is_scrape !== true && !job.data.internalOptions?.bypassBilling) {
|
||||
creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, document, job.id, costTracking);
|
||||
|
||||
if (
|
||||
@ -1378,6 +1380,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
credits_billed,
|
||||
},
|
||||
true,
|
||||
job.data.internalOptions?.bypassBilling ?? false,
|
||||
);
|
||||
|
||||
if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
|
||||
@ -1424,7 +1427,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
cost_tracking: costTracking,
|
||||
pdf_num_pages: doc.metadata.numPages,
|
||||
credits_billed,
|
||||
});
|
||||
}, false, job.data.internalOptions?.bypassBilling ?? false);
|
||||
}
|
||||
|
||||
logger.info(`🐂 Job done ${job.id}`);
|
||||
@ -1523,6 +1526,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
cost_tracking: costTracking,
|
||||
},
|
||||
true,
|
||||
job.data.internalOptions?.bypassBilling ?? false,
|
||||
);
|
||||
return data;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user