reduce logging

This commit is contained in:
Gergő Móricz 2025-05-24 00:09:05 +02:00
parent a3145ccacc
commit 492d97e889
5 changed files with 33 additions and 48 deletions

View File

@ -163,15 +163,15 @@ export async function finishCrawlPre(id: string) {
await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60); await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60);
return set === 1; return set === 1;
} else { } else {
_logger.debug("Crawl can not be pre-finished yet, not marking as finished.", { // _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
module: "crawl-redis", // module: "crawl-redis",
method: "finishCrawlPre", // method: "finishCrawlPre",
crawlId: id, // crawlId: id,
jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"), // jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
jobs: await redisConnection.scard("crawl:" + id + ":jobs"), // jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
kickoff_finished: // kickoff_finished:
(await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null, // (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
}); // });
} }
} }
@ -279,9 +279,9 @@ export async function lockURL(
(await redisConnection.scard("crawl:" + id + ":visited_unique")) >= (await redisConnection.scard("crawl:" + id + ":visited_unique")) >=
sc.crawlerOptions.limit sc.crawlerOptions.limit
) { ) {
logger.debug( // logger.debug(
"Crawl has already hit visited_unique limit, not locking URL.", // "Crawl has already hit visited_unique limit, not locking URL.",
); // );
return false; return false;
} }
} }

View File

@ -105,9 +105,9 @@ export async function getJobFromGCS(jobId: string): Promise<Document[] | null> {
// TODO: fix the any type (we have multiple Document types in the codebase) // TODO: fix the any type (we have multiple Document types in the codebase)
export async function getDocFromGCS(url: string): Promise<any | null> { export async function getDocFromGCS(url: string): Promise<any | null> {
logger.info(`Getting f-engine document from GCS`, { // logger.info(`Getting f-engine document from GCS`, {
url, // url,
}); // });
try { try {
if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) { if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) {
return null; return null;

View File

@ -383,9 +383,8 @@ export function buildFallbackList(meta: Meta): {
if (cacheIndex !== -1) { if (cacheIndex !== -1) {
_engines.splice(cacheIndex, 1); _engines.splice(cacheIndex, 1);
} }
} else {
meta.logger.debug("Cache engine enabled by useCache option");
} }
const prioritySum = [...meta.featureFlags].reduce( const prioritySum = [...meta.featureFlags].reduce(
(a, x) => a + featureFlagOptions[x].priority, (a, x) => a + featureFlagOptions[x].priority,
0, 0,
@ -424,24 +423,6 @@ export function buildFallbackList(meta: Meta): {
if (supportScore >= priorityThreshold) { if (supportScore >= priorityThreshold) {
selectedEngines.push({ engine, supportScore, unsupportedFeatures }); selectedEngines.push({ engine, supportScore, unsupportedFeatures });
meta.logger.debug(`Engine ${engine} meets feature priority threshold`, {
supportScore,
prioritySum,
priorityThreshold,
featureFlags: [...meta.featureFlags],
unsupportedFeatures,
});
} else {
meta.logger.debug(
`Engine ${engine} does not meet feature priority threshold`,
{
supportScore,
prioritySum,
priorityThreshold,
featureFlags: [...meta.featureFlags],
unsupportedFeatures,
},
);
} }
} }
@ -459,6 +440,10 @@ export function buildFallbackList(meta: Meta): {
); );
} }
meta.logger.info("Selected engines", {
selectedEngines,
});
return selectedEngines; return selectedEngines;
} }

View File

@ -47,18 +47,18 @@ async function indexJob(job: FirecrawlJob): Promise<void> {
if (!response.ok) { if (!response.ok) {
const errorData = await response.json(); const errorData = await response.json();
logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, { // logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
error: errorData, // error: errorData,
scrapeId: job.job_id, // scrapeId: job.job_id,
}); // });
} else { } else {
logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id }); // logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
} }
} catch (error) { } catch (error) {
logger.error(`Error sending job to external server: ${error.message}`, { // logger.error(`Error sending job to external server: ${error.message}`, {
error, // error,
scrapeId: job.job_id, // scrapeId: job.job_id,
}); // });
} }
} }

View File

@ -132,13 +132,13 @@ async function addScrapeJobRaw(
// If above by 2x, send them an email // If above by 2x, send them an email
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x // No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
if(concurrencyQueueJobs > maxConcurrency) { if(concurrencyQueueJobs > maxConcurrency) {
logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id); // logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
// Only send notification if it's not a crawl or batch scrape // Only send notification if it's not a crawl or batch scrape
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id); const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id);
if (shouldSendNotification) { if (shouldSendNotification) {
sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => { sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
logger.error("Error sending notification (concurrency limit reached): ", error); logger.error("Error sending notification (concurrency limit reached)", { error });
}); });
} }
} }
@ -231,13 +231,13 @@ export async function addScrapeJobs(
// equals 2x the max concurrency // equals 2x the max concurrency
if(addToCQ.length > maxConcurrency) { if(addToCQ.length > maxConcurrency) {
logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`); // logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
// Only send notification if it's not a crawl or batch scrape // Only send notification if it's not a crawl or batch scrape
if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) { if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) {
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id); const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id);
if (shouldSendNotification) { if (shouldSendNotification) {
sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => { sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
logger.error("Error sending notification (concurrency limit reached): ", error); logger.error("Error sending notification (concurrency limit reached)", { error });
}); });
} }
} }