chore: formatting

2025-08-12 21:39:06 +08:00 · 2024-12-17 16:58:57 -03:00 · 2024-12-17 16:58:57 -03:00 · 3b6edef9fa
commit 3b6edef9fa
parent b9f621bed5
12 changed files with 55 additions and 30 deletions
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -60,7 +60,11 @@ export async function scrapeController(
  try {
    doc = await waitForJob<Document>(jobId, timeout + totalWait); // TODO: better types for this
  } catch (e) {
-    logger.error(`Error in scrapeController: ${e}`, { jobId, scrapeId: jobId, startTime });
+    logger.error(`Error in scrapeController: ${e}`, {
      jobId,
      scrapeId: jobId,
      startTime,
    });
    if (
      e instanceof Error &&
      (e.message.startsWith("Job wait") || e.message === "timeout")
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -94,9 +94,13 @@ export async function addCrawlJobDone(
    await redisConnection.rpush("crawl:" + id + ":jobs_done_ordered", job_id);
  } else {
    // in case it's already been pushed, make sure it's removed
-    await redisConnection.lrem("crawl:" + id + ":jobs_done_ordered", -1, job_id);
+    await redisConnection.lrem(
      "crawl:" + id + ":jobs_done_ordered",
      -1,
      job_id,
    );
  }
-  
+
  await redisConnection.expire(
    "crawl:" + id + ":jobs_done_ordered",
    24 * 60 * 60,
--- a/apps/api/src/lib/extract/completions.ts
+++ b/apps/api/src/lib/extract/completions.ts
@ -122,4 +122,3 @@
 //     },
 //   };
 // }
--- a/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
@ -5,7 +5,7 @@ import { specialtyScrapeCheck } from "../utils/specialtyHandler";
 export async function scrapeURLWithFetch(
  meta: Meta,
-  timeToRun: number | undefined
+  timeToRun: number | undefined,
 ): Promise<EngineScrapeResult> {
  const timeout = timeToRun ?? 300000;
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
@ -128,7 +128,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
    (a, x) => (x.type === "wait" ? (x.milliseconds ?? 1000) + a : a),
    0,
  );
-  
+
  const timeout = (timeToRun ?? 300000) + totalWait;
  const request: FireEngineScrapeRequestCommon &
--- a/apps/api/src/scraper/scrapeURL/engines/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/index.ts
@ -105,7 +105,10 @@ export type EngineScrapeResult = {
 };
 const engineHandlers: {
-  [E in Engine]: (meta: Meta, timeToRun: number | undefined) => Promise<EngineScrapeResult>;
+  [E in Engine]: (
    meta: Meta,
    timeToRun: number | undefined,
  ) => Promise<EngineScrapeResult>;
 } = {
  cache: scrapeCache,
  "fire-engine;chrome-cdp": scrapeURLWithFireEngineChromeCDP,
@ -372,7 +375,7 @@ export function buildFallbackList(meta: Meta): {
 export async function scrapeURLWithEngine(
  meta: Meta,
  engine: Engine,
-  timeToRun: number | undefined
+  timeToRun: number | undefined,
 ): Promise<EngineScrapeResult> {
  const fn = engineHandlers[engine];
  const logger = meta.logger.child({
--- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
@ -124,7 +124,10 @@ async function scrapePDFWithParsePDF(
  };
 }
-export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Promise<EngineScrapeResult> {
+export async function scrapePDF(
  meta: Meta,
  timeToRun: number | undefined,
 ): Promise<EngineScrapeResult> {
  if (!meta.options.parsePDF) {
    const file = await fetchFileToBuffer(meta.url);
    const content = file.buffer.toString("base64");
@ -152,9 +155,12 @@ export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Prom
    tempFilePath,
  );
  // If the parsed text is under 500 characters and LLAMAPARSE_API_KEY exists, try LlamaParse
-  if (result.markdown && result.markdown.length < 500 && process.env.LLAMAPARSE_API_KEY) {
+  if (
    result.markdown &&
    result.markdown.length < 500 &&
    process.env.LLAMAPARSE_API_KEY
  ) {
    try {
      const llamaResult = await scrapePDFWithLlamaParse(
        {
@ -193,4 +199,4 @@ export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Prom
    html: result.html,
    markdown: result.markdown,
  };
-}
+}
--- a/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
@ -10,7 +10,10 @@ const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!);
 export function scrapeURLWithScrapingBee(
  wait_browser: "domcontentloaded" | "networkidle2",
 ): (meta: Meta, timeToRun: number | undefined) => Promise<EngineScrapeResult> {
-  return async (meta: Meta, timeToRun: number | undefined): Promise<EngineScrapeResult> => {
+  return async (
    meta: Meta,
    timeToRun: number | undefined,
  ): Promise<EngineScrapeResult> => {
    let response: AxiosResponse<any>;
    const timeout = (timeToRun ?? 300000) + meta.options.waitFor;
    try {
--- a/apps/api/src/scraper/scrapeURL/error.ts
+++ b/apps/api/src/scraper/scrapeURL/error.ts
@ -60,9 +60,7 @@ export class SiteError extends Error {
 export class ActionError extends Error {
  public code: string;
  constructor(code: string) {
-    super(
+    super("Action(s) failed to complete. Error code: " + code);
      "Action(s) failed to complete. Error code: " + code,
    );
    this.code = code;
  }
 }
--- a/apps/api/src/scraper/scrapeURL/index.ts
+++ b/apps/api/src/scraper/scrapeURL/index.ts
@ -203,9 +203,10 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
  const results: EngineResultsTracker = {};
  let result: EngineScrapeResultWithContext | null = null;
-  const timeToRun = meta.options.timeout !== undefined
+  const timeToRun =
-    ? Math.round(meta.options.timeout / Math.min(fallbackList.length, 2))
+    meta.options.timeout !== undefined
-    : undefined
+      ? Math.round(meta.options.timeout / Math.min(fallbackList.length, 2))
      : undefined;
  for (const { engine, unsupportedFeatures } of fallbackList) {
    const startedAt = Date.now();
--- a/apps/api/src/services/queue-jobs.ts
+++ b/apps/api/src/services/queue-jobs.ts
@ -72,7 +72,12 @@ async function addScrapeJobRaw(
  }
  if (concurrencyLimited) {
-    await _addScrapeJobToConcurrencyQueue(webScraperOptions, options, jobId, jobPriority);
+    await _addScrapeJobToConcurrencyQueue(
      webScraperOptions,
      options,
      jobId,
      jobPriority,
    );
  } else {
    await _addScrapeJobToBullMQ(webScraperOptions, options, jobId, jobPriority);
  }
@ -130,17 +135,17 @@ export async function addScrapeJobs(
  let countCanBeDirectlyAdded = Infinity;
-  if (
+  if (jobs[0].data && jobs[0].data.team_id && jobs[0].data.plan) {
    jobs[0].data &&
    jobs[0].data.team_id &&
    jobs[0].data.plan
  ) {
    const now = Date.now();
    const limit = await getConcurrencyLimitMax(jobs[0].data.plan);
    console.log("CC limit", limit);
    cleanOldConcurrencyLimitEntries(jobs[0].data.team_id, now);
-    countCanBeDirectlyAdded = Math.max(limit - (await getConcurrencyLimitActiveJobs(jobs[0].data.team_id, now)).length, 0);
+    countCanBeDirectlyAdded = Math.max(
      limit -
        (await getConcurrencyLimitActiveJobs(jobs[0].data.team_id, now)).length,
      0,
    );
  }
  const addToBull = jobs.slice(0, countCanBeDirectlyAdded);
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -496,15 +496,14 @@ async function processJob(job: Job & { id: string }, token: string) {
          // See lockURL
          const x = await redisConnection.sadd(
            "crawl:" + job.data.crawl_id + ":visited",
-            ...p1.map(x => x.href),
+            ...p1.map((x) => x.href),
          );
          const lockRes = x === p1.length;
-  
+
          if (job.data.crawlerOptions !== null && !lockRes) {
            throw new RacedRedirectError();
          }
        }
      }
      logger.debug("Logging job to DB...");
@ -675,7 +674,10 @@ async function processJob(job: Job & { id: string }, token: string) {
      logger.debug("Declaring job as done...");
      await addCrawlJobDone(job.data.crawl_id, job.id, false);
-      await redisConnection.srem("crawl:" + job.data.crawl_id + ":visited_unique", normalizeURL(job.data.url, sc));
+      await redisConnection.srem(
        "crawl:" + job.data.crawl_id + ":visited_unique",
        normalizeURL(job.data.url, sc),
      );
      logger.debug("Logging job to DB...");
      await logJob(