mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 23:49:02 +08:00
feat(queue-jobs): update notification logic for concurrency limits and add parameter (jsdocs) to batchScrapeUrls
This commit is contained in:
parent
e0a3c54967
commit
58e587d99e
@ -1,6 +1,6 @@
|
|||||||
import { getScrapeQueue } from "./queue-service";
|
import { getScrapeQueue } from "./queue-service";
|
||||||
import { v4 as uuidv4 } from "uuid";
|
import { v4 as uuidv4 } from "uuid";
|
||||||
import { PlanType, WebScraperOptions } from "../types";
|
import { NotificationType, PlanType, WebScraperOptions } from "../types";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import {
|
import {
|
||||||
cleanOldConcurrencyLimitEntries,
|
cleanOldConcurrencyLimitEntries,
|
||||||
@ -11,6 +11,7 @@ import {
|
|||||||
} from "../lib/concurrency-limit";
|
} from "../lib/concurrency-limit";
|
||||||
import { logger } from "../lib/logger";
|
import { logger } from "../lib/logger";
|
||||||
import { getConcurrencyLimitMax } from "./rate-limiter";
|
import { getConcurrencyLimitMax } from "./rate-limiter";
|
||||||
|
import { sendNotificationWithCustomDays } from './notification/email_notification';
|
||||||
|
|
||||||
async function _addScrapeJobToConcurrencyQueue(
|
async function _addScrapeJobToConcurrencyQueue(
|
||||||
webScraperOptions: any,
|
webScraperOptions: any,
|
||||||
@ -80,9 +81,9 @@ async function addScrapeJobRaw(
|
|||||||
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
|
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
|
||||||
if(concurrencyQueueJobs > maxConcurrency) {
|
if(concurrencyQueueJobs > maxConcurrency) {
|
||||||
logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
|
logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
|
||||||
// sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 10, false).catch((error) => {
|
sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
||||||
// logger.error("Error sending notification (concurrency limit reached): ", error);
|
logger.error("Error sending notification (concurrency limit reached): ", error);
|
||||||
// });
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
webScraperOptions.concurrencyLimited = true;
|
webScraperOptions.concurrencyLimited = true;
|
||||||
@ -171,9 +172,9 @@ export async function addScrapeJobs(
|
|||||||
// equals 2x the max concurrency
|
// equals 2x the max concurrency
|
||||||
if(addToCQ.length > maxConcurrency) {
|
if(addToCQ.length > maxConcurrency) {
|
||||||
logger.info("Concurrency limited 2x (multiple) - ", "Concurrency queue jobs: ", addToCQ.length, "Max concurrency: ", maxConcurrency, "Team ID: ", jobs[0].data.team_id);
|
logger.info("Concurrency limited 2x (multiple) - ", "Concurrency queue jobs: ", addToCQ.length, "Max concurrency: ", maxConcurrency, "Team ID: ", jobs[0].data.team_id);
|
||||||
// sendNotificationWithCustomDays(jobs[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 10, false).catch((error) => {
|
sendNotificationWithCustomDays(jobs[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
||||||
// logger.error("Error sending notification (concurrency limit reached): ", error);
|
logger.error("Error sending notification (concurrency limit reached): ", error);
|
||||||
// });
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
|
@ -922,6 +922,7 @@ export default class FirecrawlApp {
|
|||||||
* @param pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @param webhook - Optional webhook for the batch scrape.
|
* @param webhook - Optional webhook for the batch scrape.
|
||||||
|
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
||||||
* @returns The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
async batchScrapeUrls(
|
async batchScrapeUrls(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user