feat(search): ignore concurrency limit for search (FIR-2187) (#1617)

* feat(search): ignore concurrency limit for search (temp)

* feat(search): only for low tier users for good DX
This commit is contained in:
Gergő Móricz 2025-06-02 22:07:44 +02:00 committed by GitHub
parent 1396451d31
commit 98ceda9bd5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 3 deletions

View File

@ -76,6 +76,7 @@ async function scrapeSearchResult(
logger: Logger, logger: Logger,
costTracking: CostTracking, costTracking: CostTracking,
flags: TeamFlags, flags: TeamFlags,
directToBullMQ: boolean = false,
): Promise<Document> { ): Promise<Document> {
const jobId = uuidv4(); const jobId = uuidv4();
const jobPriority = await getJobPriority({ const jobPriority = await getJobPriority({
@ -102,11 +103,11 @@ async function scrapeSearchResult(
internalOptions: { teamId: options.teamId, useCache: true }, internalOptions: { teamId: options.teamId, useCache: true },
origin: options.origin, origin: options.origin,
is_scrape: true, is_scrape: true,
}, },
{}, {},
jobId, jobId,
jobPriority, jobPriority,
directToBullMQ,
); );
const doc: Document = await waitForJob(jobId, options.timeout); const doc: Document = await waitForJob(jobId, options.timeout);
@ -229,7 +230,7 @@ export async function searchController(
origin: req.body.origin, origin: req.body.origin,
timeout: req.body.timeout, timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions, scrapeOptions: req.body.scrapeOptions,
}, logger, costTracking, req.acuc?.flags ?? null), }, logger, costTracking, req.acuc?.flags ?? null, (req.acuc?.price_credits ?? 0) <= 3000),
); );
const docs = await Promise.all(scrapePromises); const docs = await Promise.all(scrapePromises);

View File

@ -97,6 +97,7 @@ async function addScrapeJobRaw(
options: any, options: any,
jobId: string, jobId: string,
jobPriority: number, jobPriority: number,
directToBullMQ: boolean = false,
) { ) {
const hasCrawlDelay = webScraperOptions.crawl_id && webScraperOptions.crawlerOptions?.delay; const hasCrawlDelay = webScraperOptions.crawl_id && webScraperOptions.crawlerOptions?.delay;
@ -127,7 +128,7 @@ async function addScrapeJobRaw(
const concurrencyQueueJobs = await getConcurrencyQueueJobsCount(webScraperOptions.team_id); const concurrencyQueueJobs = await getConcurrencyQueueJobsCount(webScraperOptions.team_id);
if (concurrencyLimited) { if (concurrencyLimited && !directToBullMQ) {
// Detect if they hit their concurrent limit // Detect if they hit their concurrent limit
// If above by 2x, send them an email // If above by 2x, send them an email
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x // No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
@ -161,6 +162,7 @@ export async function addScrapeJob(
options: any = {}, options: any = {},
jobId: string = uuidv4(), jobId: string = uuidv4(),
jobPriority: number = 10, jobPriority: number = 10,
directToBullMQ: boolean = false,
) { ) {
if (Sentry.isInitialized()) { if (Sentry.isInitialized()) {
const size = JSON.stringify(webScraperOptions).length; const size = JSON.stringify(webScraperOptions).length;
@ -187,6 +189,7 @@ export async function addScrapeJob(
options, options,
jobId, jobId,
jobPriority, jobPriority,
directToBullMQ,
); );
}, },
); );