feat(queue-jobs): update notification logic for concurrency limits and add parameter (jsdocs) to batchScrapeUrls

This commit is contained in:
Ademílson F. Tonato 2025-03-31 13:27:36 +01:00
parent e0a3c54967
commit 58e587d99e
No known key found for this signature in database
GPG Key ID: 169C7BE271C9FA3A
2 changed files with 9 additions and 7 deletions

View File

@ -1,6 +1,6 @@
import { getScrapeQueue } from "./queue-service"; import { getScrapeQueue } from "./queue-service";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { PlanType, WebScraperOptions } from "../types"; import { NotificationType, PlanType, WebScraperOptions } from "../types";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { import {
cleanOldConcurrencyLimitEntries, cleanOldConcurrencyLimitEntries,
@ -11,6 +11,7 @@ import {
} from "../lib/concurrency-limit"; } from "../lib/concurrency-limit";
import { logger } from "../lib/logger"; import { logger } from "../lib/logger";
import { getConcurrencyLimitMax } from "./rate-limiter"; import { getConcurrencyLimitMax } from "./rate-limiter";
import { sendNotificationWithCustomDays } from './notification/email_notification';
async function _addScrapeJobToConcurrencyQueue( async function _addScrapeJobToConcurrencyQueue(
webScraperOptions: any, webScraperOptions: any,
@ -80,9 +81,9 @@ async function addScrapeJobRaw(
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x // No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
if(concurrencyQueueJobs > maxConcurrency) { if(concurrencyQueueJobs > maxConcurrency) {
logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id); logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
// sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 10, false).catch((error) => { sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
// logger.error("Error sending notification (concurrency limit reached): ", error); logger.error("Error sending notification (concurrency limit reached): ", error);
// }); });
} }
webScraperOptions.concurrencyLimited = true; webScraperOptions.concurrencyLimited = true;
@ -171,9 +172,9 @@ export async function addScrapeJobs(
// equals 2x the max concurrency // equals 2x the max concurrency
if(addToCQ.length > maxConcurrency) { if(addToCQ.length > maxConcurrency) {
logger.info("Concurrency limited 2x (multiple) - ", "Concurrency queue jobs: ", addToCQ.length, "Max concurrency: ", maxConcurrency, "Team ID: ", jobs[0].data.team_id); logger.info("Concurrency limited 2x (multiple) - ", "Concurrency queue jobs: ", addToCQ.length, "Max concurrency: ", maxConcurrency, "Team ID: ", jobs[0].data.team_id);
// sendNotificationWithCustomDays(jobs[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 10, false).catch((error) => { sendNotificationWithCustomDays(jobs[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
// logger.error("Error sending notification (concurrency limit reached): ", error); logger.error("Error sending notification (concurrency limit reached): ", error);
// }); });
} }
await Promise.all( await Promise.all(

View File

@ -922,6 +922,7 @@ export default class FirecrawlApp {
* @param pollInterval - Time in seconds for job status checks. * @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request. * @param idempotencyKey - Optional idempotency key for the request.
* @param webhook - Optional webhook for the batch scrape. * @param webhook - Optional webhook for the batch scrape.
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
* @returns The response from the crawl operation. * @returns The response from the crawl operation.
*/ */
async batchScrapeUrls( async batchScrapeUrls(