chore: formatting

2025-08-12 21:39:06 +08:00 · 2024-12-17 16:58:57 -03:00 · 2024-12-17 16:58:57 -03:00 · 3b6edef9fa
commit 3b6edef9fa
parent b9f621bed5
12 changed files with 55 additions and 30 deletions
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -60,7 +60,11 @@ export async function scrapeController(
  try {
    doc = await waitForJob<Document>(jobId, timeout + totalWait); // TODO: better types for this
  } catch (e) {
-    logger.error(`Error in scrapeController: ${e}`, { jobId, scrapeId: jobId, startTime });
+    logger.error(`Error in scrapeController: ${e}`, {
+      jobId,
+      scrapeId: jobId,
+      startTime,
+    });
    if (
      e instanceof Error &&
      (e.message.startsWith("Job wait") || e.message === "timeout")
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -94,7 +94,11 @@ export async function addCrawlJobDone(
    await redisConnection.rpush("crawl:" + id + ":jobs_done_ordered", job_id);
  } else {
    // in case it's already been pushed, make sure it's removed
-    await redisConnection.lrem("crawl:" + id + ":jobs_done_ordered", -1, job_id);
+    await redisConnection.lrem(
+      "crawl:" + id + ":jobs_done_ordered",
+      -1,
+      job_id,
+    );
  }

  await redisConnection.expire(
--- a/apps/api/src/lib/extract/completions.ts
+++ b/apps/api/src/lib/extract/completions.ts
@ -122,4 +122,3 @@
 //     },
 //   };
 // }
-
--- a/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
@ -5,7 +5,7 @@ import { specialtyScrapeCheck } from "../utils/specialtyHandler";

 export async function scrapeURLWithFetch(
  meta: Meta,
-  timeToRun: number | undefined
+  timeToRun: number | undefined,
 ): Promise<EngineScrapeResult> {
  const timeout = timeToRun ?? 300000;

--- a/apps/api/src/scraper/scrapeURL/engines/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/index.ts
@ -105,7 +105,10 @@ export type EngineScrapeResult = {
 };

 const engineHandlers: {
-  [E in Engine]: (meta: Meta, timeToRun: number | undefined) => Promise<EngineScrapeResult>;
+  [E in Engine]: (
+    meta: Meta,
+    timeToRun: number | undefined,
+  ) => Promise<EngineScrapeResult>;
 } = {
  cache: scrapeCache,
  "fire-engine;chrome-cdp": scrapeURLWithFireEngineChromeCDP,
@ -372,7 +375,7 @@ export function buildFallbackList(meta: Meta): {
 export async function scrapeURLWithEngine(
  meta: Meta,
  engine: Engine,
-  timeToRun: number | undefined
+  timeToRun: number | undefined,
 ): Promise<EngineScrapeResult> {
  const fn = engineHandlers[engine];
  const logger = meta.logger.child({
--- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
@ -124,7 +124,10 @@ async function scrapePDFWithParsePDF(
  };
 }

-export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Promise<EngineScrapeResult> {
+export async function scrapePDF(
+  meta: Meta,
+  timeToRun: number | undefined,
+): Promise<EngineScrapeResult> {
  if (!meta.options.parsePDF) {
    const file = await fetchFileToBuffer(meta.url);
    const content = file.buffer.toString("base64");
@ -152,9 +155,12 @@ export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Prom
    tempFilePath,
  );

-
  // If the parsed text is under 500 characters and LLAMAPARSE_API_KEY exists, try LlamaParse
-  if (result.markdown && result.markdown.length < 500 && process.env.LLAMAPARSE_API_KEY) {
+  if (
+    result.markdown &&
+    result.markdown.length < 500 &&
+    process.env.LLAMAPARSE_API_KEY
+  ) {
    try {
      const llamaResult = await scrapePDFWithLlamaParse(
        {
--- a/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
@ -10,7 +10,10 @@ const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!);
 export function scrapeURLWithScrapingBee(
  wait_browser: "domcontentloaded" | "networkidle2",
 ): (meta: Meta, timeToRun: number | undefined) => Promise<EngineScrapeResult> {
-  return async (meta: Meta, timeToRun: number | undefined): Promise<EngineScrapeResult> => {
+  return async (
+    meta: Meta,
+    timeToRun: number | undefined,
+  ): Promise<EngineScrapeResult> => {
    let response: AxiosResponse<any>;
    const timeout = (timeToRun ?? 300000) + meta.options.waitFor;
    try {
--- a/apps/api/src/scraper/scrapeURL/error.ts
+++ b/apps/api/src/scraper/scrapeURL/error.ts
@ -60,9 +60,7 @@ export class SiteError extends Error {
 export class ActionError extends Error {
  public code: string;
  constructor(code: string) {
-    super(
-      "Action(s) failed to complete. Error code: " + code,
-    );
+    super("Action(s) failed to complete. Error code: " + code);
    this.code = code;
  }
 }
--- a/apps/api/src/scraper/scrapeURL/index.ts
+++ b/apps/api/src/scraper/scrapeURL/index.ts
@ -203,9 +203,10 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
  const results: EngineResultsTracker = {};
  let result: EngineScrapeResultWithContext | null = null;

-  const timeToRun = meta.options.timeout !== undefined
+  const timeToRun =
+    meta.options.timeout !== undefined
      ? Math.round(meta.options.timeout / Math.min(fallbackList.length, 2))
-    : undefined
+      : undefined;

  for (const { engine, unsupportedFeatures } of fallbackList) {
    const startedAt = Date.now();
--- a/apps/api/src/services/queue-jobs.ts
+++ b/apps/api/src/services/queue-jobs.ts
@ -72,7 +72,12 @@ async function addScrapeJobRaw(
  }

  if (concurrencyLimited) {
-    await _addScrapeJobToConcurrencyQueue(webScraperOptions, options, jobId, jobPriority);
+    await _addScrapeJobToConcurrencyQueue(
+      webScraperOptions,
+      options,
+      jobId,
+      jobPriority,
+    );
  } else {
    await _addScrapeJobToBullMQ(webScraperOptions, options, jobId, jobPriority);
  }
@ -130,17 +135,17 @@ export async function addScrapeJobs(

  let countCanBeDirectlyAdded = Infinity;

-  if (
-    jobs[0].data &&
-    jobs[0].data.team_id &&
-    jobs[0].data.plan
-  ) {
+  if (jobs[0].data && jobs[0].data.team_id && jobs[0].data.plan) {
    const now = Date.now();
    const limit = await getConcurrencyLimitMax(jobs[0].data.plan);
    console.log("CC limit", limit);
    cleanOldConcurrencyLimitEntries(jobs[0].data.team_id, now);

-    countCanBeDirectlyAdded = Math.max(limit - (await getConcurrencyLimitActiveJobs(jobs[0].data.team_id, now)).length, 0);
+    countCanBeDirectlyAdded = Math.max(
+      limit -
+        (await getConcurrencyLimitActiveJobs(jobs[0].data.team_id, now)).length,
+      0,
+    );
  }

  const addToBull = jobs.slice(0, countCanBeDirectlyAdded);
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -496,7 +496,7 @@ async function processJob(job: Job & { id: string }, token: string) {
          // See lockURL
          const x = await redisConnection.sadd(
            "crawl:" + job.data.crawl_id + ":visited",
-            ...p1.map(x => x.href),
+            ...p1.map((x) => x.href),
          );
          const lockRes = x === p1.length;

@ -504,7 +504,6 @@ async function processJob(job: Job & { id: string }, token: string) {
            throw new RacedRedirectError();
          }
        }
-        
      }

      logger.debug("Logging job to DB...");
@ -675,7 +674,10 @@ async function processJob(job: Job & { id: string }, token: string) {

      logger.debug("Declaring job as done...");
      await addCrawlJobDone(job.data.crawl_id, job.id, false);
-      await redisConnection.srem("crawl:" + job.data.crawl_id + ":visited_unique", normalizeURL(job.data.url, sc));
+      await redisConnection.srem(
+        "crawl:" + job.data.crawl_id + ":visited_unique",
+        normalizeURL(job.data.url, sc),
+      );

      logger.debug("Logging job to DB...");
      await logJob(