From 492d97e88903d39b9c91d8697b1efae8517ab0c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Sat, 24 May 2025 00:09:05 +0200 Subject: [PATCH] reduce logging --- apps/api/src/lib/crawl-redis.ts | 24 +++++++++--------- apps/api/src/lib/gcs-jobs.ts | 6 ++--- .../src/scraper/scrapeURL/engines/index.ts | 25 ++++--------------- apps/api/src/services/logging/log_job.ts | 18 ++++++------- apps/api/src/services/queue-jobs.ts | 8 +++--- 5 files changed, 33 insertions(+), 48 deletions(-) diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index caa95429..96de520d 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -163,15 +163,15 @@ export async function finishCrawlPre(id: string) { await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60); return set === 1; } else { - _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", { - module: "crawl-redis", - method: "finishCrawlPre", - crawlId: id, - jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"), - jobs: await redisConnection.scard("crawl:" + id + ":jobs"), - kickoff_finished: - (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null, - }); + // _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", { + // module: "crawl-redis", + // method: "finishCrawlPre", + // crawlId: id, + // jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"), + // jobs: await redisConnection.scard("crawl:" + id + ":jobs"), + // kickoff_finished: + // (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null, + // }); } } @@ -279,9 +279,9 @@ export async function lockURL( (await redisConnection.scard("crawl:" + id + ":visited_unique")) >= sc.crawlerOptions.limit ) { - logger.debug( - "Crawl has already hit visited_unique limit, not locking URL.", - ); + // logger.debug( + // "Crawl has already hit visited_unique limit, not locking URL.", + // ); return false; } } diff --git a/apps/api/src/lib/gcs-jobs.ts b/apps/api/src/lib/gcs-jobs.ts index 278e6e19..3e780457 100644 --- a/apps/api/src/lib/gcs-jobs.ts +++ b/apps/api/src/lib/gcs-jobs.ts @@ -105,9 +105,9 @@ export async function getJobFromGCS(jobId: string): Promise { // TODO: fix the any type (we have multiple Document types in the codebase) export async function getDocFromGCS(url: string): Promise { - logger.info(`Getting f-engine document from GCS`, { - url, - }); +// logger.info(`Getting f-engine document from GCS`, { +// url, +// }); try { if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) { return null; diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index b59313e3..06c5e072 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -383,9 +383,8 @@ export function buildFallbackList(meta: Meta): { if (cacheIndex !== -1) { _engines.splice(cacheIndex, 1); } - } else { - meta.logger.debug("Cache engine enabled by useCache option"); } + const prioritySum = [...meta.featureFlags].reduce( (a, x) => a + featureFlagOptions[x].priority, 0, @@ -424,24 +423,6 @@ export function buildFallbackList(meta: Meta): { if (supportScore >= priorityThreshold) { selectedEngines.push({ engine, supportScore, unsupportedFeatures }); - meta.logger.debug(`Engine ${engine} meets feature priority threshold`, { - supportScore, - prioritySum, - priorityThreshold, - featureFlags: [...meta.featureFlags], - unsupportedFeatures, - }); - } else { - meta.logger.debug( - `Engine ${engine} does not meet feature priority threshold`, - { - supportScore, - prioritySum, - priorityThreshold, - featureFlags: [...meta.featureFlags], - unsupportedFeatures, - }, - ); } } @@ -459,6 +440,10 @@ export function buildFallbackList(meta: Meta): { ); } + meta.logger.info("Selected engines", { + selectedEngines, + }); + return selectedEngines; } diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index a1e0a8d5..e73afeb5 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -47,18 +47,18 @@ async function indexJob(job: FirecrawlJob): Promise { if (!response.ok) { const errorData = await response.json(); - logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, { - error: errorData, - scrapeId: job.job_id, - }); + // logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, { + // error: errorData, + // scrapeId: job.job_id, + // }); } else { - logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id }); + // logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id }); } } catch (error) { - logger.error(`Error sending job to external server: ${error.message}`, { - error, - scrapeId: job.job_id, - }); + // logger.error(`Error sending job to external server: ${error.message}`, { + // error, + // scrapeId: job.job_id, + // }); } } diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index 2a08de6c..3456c82c 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -132,13 +132,13 @@ async function addScrapeJobRaw( // If above by 2x, send them an email // No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x if(concurrencyQueueJobs > maxConcurrency) { - logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id); + // logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id); // Only send notification if it's not a crawl or batch scrape const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id); if (shouldSendNotification) { sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => { - logger.error("Error sending notification (concurrency limit reached): ", error); + logger.error("Error sending notification (concurrency limit reached)", { error }); }); } } @@ -231,13 +231,13 @@ export async function addScrapeJobs( // equals 2x the max concurrency if(addToCQ.length > maxConcurrency) { - logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`); + // logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`); // Only send notification if it's not a crawl or batch scrape if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) { const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id); if (shouldSendNotification) { sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => { - logger.error("Error sending notification (concurrency limit reached): ", error); + logger.error("Error sending notification (concurrency limit reached)", { error }); }); } }