reduce logging

2025-08-12 02:18:59 +08:00 · 2025-05-24 00:09:05 +02:00 · 2025-05-24 00:09:05 +02:00 · 492d97e889
commit 492d97e889
parent a3145ccacc
5 changed files with 33 additions and 48 deletions
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -163,15 +163,15 @@ export async function finishCrawlPre(id: string) {
    await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60);
    return set === 1;
  } else {
-    _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
-      module: "crawl-redis",
-      method: "finishCrawlPre",
-      crawlId: id,
-      jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
-      jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
-      kickoff_finished:
-        (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
-    });
+    // _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
+    //   module: "crawl-redis",
+    //   method: "finishCrawlPre",
+    //   crawlId: id,
+    //   jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
+    //   jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
+    //   kickoff_finished:
+    //     (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
+    // });
  }
 }

@ -279,9 +279,9 @@ export async function lockURL(
      (await redisConnection.scard("crawl:" + id + ":visited_unique")) >=
      sc.crawlerOptions.limit
    ) {
-      logger.debug(
-        "Crawl has already hit visited_unique limit, not locking URL.",
-      );
+      // logger.debug(
+      //   "Crawl has already hit visited_unique limit, not locking URL.",
+      // );
      return false;
    }
  }
--- a/apps/api/src/lib/gcs-jobs.ts
+++ b/apps/api/src/lib/gcs-jobs.ts
@ -105,9 +105,9 @@ export async function getJobFromGCS(jobId: string): Promise<Document[] | null> {

 // TODO: fix the any type (we have multiple Document types in the codebase)
 export async function getDocFromGCS(url: string): Promise<any | null> {
-  logger.info(`Getting f-engine document from GCS`, {
-    url,
-  });
+//   logger.info(`Getting f-engine document from GCS`, {
+//     url,
+//   });
  try {
      if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) {
          return null;
--- a/apps/api/src/scraper/scrapeURL/engines/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/index.ts
@ -383,9 +383,8 @@ export function buildFallbackList(meta: Meta): {
    if (cacheIndex !== -1) {
      _engines.splice(cacheIndex, 1);
    }
-  } else {
-    meta.logger.debug("Cache engine enabled by useCache option");
  }
+  
  const prioritySum = [...meta.featureFlags].reduce(
    (a, x) => a + featureFlagOptions[x].priority,
    0,
@ -424,24 +423,6 @@ export function buildFallbackList(meta: Meta): {

    if (supportScore >= priorityThreshold) {
      selectedEngines.push({ engine, supportScore, unsupportedFeatures });
-      meta.logger.debug(`Engine ${engine} meets feature priority threshold`, {
-        supportScore,
-        prioritySum,
-        priorityThreshold,
-        featureFlags: [...meta.featureFlags],
-        unsupportedFeatures,
-      });
-    } else {
-      meta.logger.debug(
-        `Engine ${engine} does not meet feature priority threshold`,
-        {
-          supportScore,
-          prioritySum,
-          priorityThreshold,
-          featureFlags: [...meta.featureFlags],
-          unsupportedFeatures,
-        },
-      );
    }
  }

@ -459,6 +440,10 @@ export function buildFallbackList(meta: Meta): {
    );
  }

+  meta.logger.info("Selected engines", {
+    selectedEngines,
+  });
+
  return selectedEngines;
 }

--- a/apps/api/src/services/logging/log_job.ts
+++ b/apps/api/src/services/logging/log_job.ts
@ -47,18 +47,18 @@ async function indexJob(job: FirecrawlJob): Promise<void> {

    if (!response.ok) {
      const errorData = await response.json();
-      logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
-        error: errorData,
-        scrapeId: job.job_id,
-      });
+      // logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
+      //   error: errorData,
+      //   scrapeId: job.job_id,
+      // });
    } else {
-      logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
+      // logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
    }
  } catch (error) {
-    logger.error(`Error sending job to external server: ${error.message}`, {
-      error,
-      scrapeId: job.job_id,
-    });
+    // logger.error(`Error sending job to external server: ${error.message}`, {
+    //   error,
+    //   scrapeId: job.job_id,
+    // });
  }
 }

--- a/apps/api/src/services/queue-jobs.ts
+++ b/apps/api/src/services/queue-jobs.ts
@ -132,13 +132,13 @@ async function addScrapeJobRaw(
    // If above by 2x, send them an email
    // No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
    if(concurrencyQueueJobs > maxConcurrency) {
-      logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
+      // logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);

      // Only send notification if it's not a crawl or batch scrape
        const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id);
        if (shouldSendNotification) {
          sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
-            logger.error("Error sending notification (concurrency limit reached): ", error);
+            logger.error("Error sending notification (concurrency limit reached)", { error });
          });
        }
    }
@ -231,13 +231,13 @@ export async function addScrapeJobs(

  // equals 2x the max concurrency
  if(addToCQ.length > maxConcurrency) {
-    logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
+    // logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
    // Only send notification if it's not a crawl or batch scrape
    if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) {
      const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id);
      if (shouldSendNotification) {
        sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
-          logger.error("Error sending notification (concurrency limit reached): ", error);
+          logger.error("Error sending notification (concurrency limit reached)", { error });
        });
      }
    }