mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 02:18:59 +08:00
reduce logging
This commit is contained in:
parent
a3145ccacc
commit
492d97e889
@ -163,15 +163,15 @@ export async function finishCrawlPre(id: string) {
|
||||
await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60);
|
||||
return set === 1;
|
||||
} else {
|
||||
_logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
|
||||
module: "crawl-redis",
|
||||
method: "finishCrawlPre",
|
||||
crawlId: id,
|
||||
jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
|
||||
jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
|
||||
kickoff_finished:
|
||||
(await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
|
||||
});
|
||||
// _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
|
||||
// module: "crawl-redis",
|
||||
// method: "finishCrawlPre",
|
||||
// crawlId: id,
|
||||
// jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
|
||||
// jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
|
||||
// kickoff_finished:
|
||||
// (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
|
||||
// });
|
||||
}
|
||||
}
|
||||
|
||||
@ -279,9 +279,9 @@ export async function lockURL(
|
||||
(await redisConnection.scard("crawl:" + id + ":visited_unique")) >=
|
||||
sc.crawlerOptions.limit
|
||||
) {
|
||||
logger.debug(
|
||||
"Crawl has already hit visited_unique limit, not locking URL.",
|
||||
);
|
||||
// logger.debug(
|
||||
// "Crawl has already hit visited_unique limit, not locking URL.",
|
||||
// );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -105,9 +105,9 @@ export async function getJobFromGCS(jobId: string): Promise<Document[] | null> {
|
||||
|
||||
// TODO: fix the any type (we have multiple Document types in the codebase)
|
||||
export async function getDocFromGCS(url: string): Promise<any | null> {
|
||||
logger.info(`Getting f-engine document from GCS`, {
|
||||
url,
|
||||
});
|
||||
// logger.info(`Getting f-engine document from GCS`, {
|
||||
// url,
|
||||
// });
|
||||
try {
|
||||
if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) {
|
||||
return null;
|
||||
|
@ -383,9 +383,8 @@ export function buildFallbackList(meta: Meta): {
|
||||
if (cacheIndex !== -1) {
|
||||
_engines.splice(cacheIndex, 1);
|
||||
}
|
||||
} else {
|
||||
meta.logger.debug("Cache engine enabled by useCache option");
|
||||
}
|
||||
|
||||
const prioritySum = [...meta.featureFlags].reduce(
|
||||
(a, x) => a + featureFlagOptions[x].priority,
|
||||
0,
|
||||
@ -424,24 +423,6 @@ export function buildFallbackList(meta: Meta): {
|
||||
|
||||
if (supportScore >= priorityThreshold) {
|
||||
selectedEngines.push({ engine, supportScore, unsupportedFeatures });
|
||||
meta.logger.debug(`Engine ${engine} meets feature priority threshold`, {
|
||||
supportScore,
|
||||
prioritySum,
|
||||
priorityThreshold,
|
||||
featureFlags: [...meta.featureFlags],
|
||||
unsupportedFeatures,
|
||||
});
|
||||
} else {
|
||||
meta.logger.debug(
|
||||
`Engine ${engine} does not meet feature priority threshold`,
|
||||
{
|
||||
supportScore,
|
||||
prioritySum,
|
||||
priorityThreshold,
|
||||
featureFlags: [...meta.featureFlags],
|
||||
unsupportedFeatures,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -459,6 +440,10 @@ export function buildFallbackList(meta: Meta): {
|
||||
);
|
||||
}
|
||||
|
||||
meta.logger.info("Selected engines", {
|
||||
selectedEngines,
|
||||
});
|
||||
|
||||
return selectedEngines;
|
||||
}
|
||||
|
||||
|
@ -47,18 +47,18 @@ async function indexJob(job: FirecrawlJob): Promise<void> {
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json();
|
||||
logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
|
||||
error: errorData,
|
||||
scrapeId: job.job_id,
|
||||
});
|
||||
// logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
|
||||
// error: errorData,
|
||||
// scrapeId: job.job_id,
|
||||
// });
|
||||
} else {
|
||||
logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
|
||||
// logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error sending job to external server: ${error.message}`, {
|
||||
error,
|
||||
scrapeId: job.job_id,
|
||||
});
|
||||
// logger.error(`Error sending job to external server: ${error.message}`, {
|
||||
// error,
|
||||
// scrapeId: job.job_id,
|
||||
// });
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,13 +132,13 @@ async function addScrapeJobRaw(
|
||||
// If above by 2x, send them an email
|
||||
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
|
||||
if(concurrencyQueueJobs > maxConcurrency) {
|
||||
logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
|
||||
// logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
|
||||
|
||||
// Only send notification if it's not a crawl or batch scrape
|
||||
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id);
|
||||
if (shouldSendNotification) {
|
||||
sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
||||
logger.error("Error sending notification (concurrency limit reached): ", error);
|
||||
logger.error("Error sending notification (concurrency limit reached)", { error });
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -231,13 +231,13 @@ export async function addScrapeJobs(
|
||||
|
||||
// equals 2x the max concurrency
|
||||
if(addToCQ.length > maxConcurrency) {
|
||||
logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
|
||||
// logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
|
||||
// Only send notification if it's not a crawl or batch scrape
|
||||
if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) {
|
||||
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id);
|
||||
if (shouldSendNotification) {
|
||||
sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
||||
logger.error("Error sending notification (concurrency limit reached): ", error);
|
||||
logger.error("Error sending notification (concurrency limit reached)", { error });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user