Nick: formatting fixes

2025-08-13 03:09:11 +08:00 · 2025-01-10 18:35:10 -03:00 · 2025-01-10 18:35:10 -03:00 · f4d10c5031
commit f4d10c5031
parent d1f3b96388
37 changed files with 674 additions and 499 deletions
--- a/apps/api/src/controllers/v0/crawl.ts
+++ b/apps/api/src/controllers/v0/crawl.ts
@ -179,11 +179,15 @@ export async function crawlController(req: Request, res: Response) {

    const sitemap = sc.crawlerOptions.ignoreSitemap
      ? 0
-        : await crawler.tryGetSitemap(async urls => {
+      : await crawler.tryGetSitemap(async (urls) => {
          if (urls.length === 0) return;

-            let jobPriority = await getJobPriority({ plan, team_id, basePriority: 21 });
-            const jobs = urls.map(url => {
+          let jobPriority = await getJobPriority({
+            plan,
+            team_id,
+            basePriority: 21,
+          });
+          const jobs = urls.map((url) => {
            const uuid = uuidv4();
            return {
              name: uuid,
--- a/apps/api/src/controllers/v0/crawlPreview.ts
+++ b/apps/api/src/controllers/v0/crawlPreview.ts
@ -114,7 +114,7 @@ export async function crawlPreviewController(req: Request, res: Response) {

    const sitemap = sc.crawlerOptions?.ignoreSitemap
      ? 0
-      : await crawler.tryGetSitemap(async urls => {
+      : await crawler.tryGetSitemap(async (urls) => {
          for (const url of urls) {
            await lockURL(id, sc, url);
            const jobId = uuidv4();
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@ -115,7 +115,8 @@ export async function crawlStatusController(
  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] =
    sc.cancelled
      ? "cancelled"
-      : (validJobStatuses.every((x) => x[1] === "completed") && validJobStatuses.length > 0)
+      : validJobStatuses.every((x) => x[1] === "completed") &&
+          validJobStatuses.length > 0
        ? "completed"
        : "scraping";

--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@ -7,11 +7,7 @@ import {
  RequestWithAuth,
  toLegacyCrawlerOptions,
 } from "./types";
-import {
-  crawlToCrawler,
-  saveCrawl,
-  StoredCrawl,
-} from "../../lib/crawl-redis";
+import { crawlToCrawler, saveCrawl, StoredCrawl } from "../../lib/crawl-redis";
 import { logCrawl } from "../../services/logging/crawl_log";
 import { _addScrapeJobToBullMQ } from "../../services/queue-jobs";
 import { logger as _logger } from "../../lib/logger";
@ -103,7 +99,8 @@ export async function crawlController(

  await saveCrawl(id, sc);

-  await _addScrapeJobToBullMQ({
+  await _addScrapeJobToBullMQ(
+    {
      url: req.body.url,
      mode: "kickoff" as const,
      team_id: req.auth.team_id,
@ -115,7 +112,11 @@ export async function crawlController(
      crawl_id: id,
      webhook: req.body.webhook,
      v1: true,
-  }, {}, crypto.randomUUID(), 10);
+    },
+    {},
+    crypto.randomUUID(),
+    10,
+  );

  const protocol = process.env.ENV === "local" ? req.protocol : "https";

--- a/apps/api/src/controllers/v1/extract.ts
+++ b/apps/api/src/controllers/v1/extract.ts
@ -11,7 +11,11 @@ import { saveExtract } from "../../lib/extract/extract-redis";
 import { getTeamIdSyncB } from "../../lib/extract/team-id-sync";
 import { performExtraction } from "../../lib/extract/extraction-service";

-export async function oldExtract(req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, res: Response<ExtractResponse>, extractId: string){
+export async function oldExtract(
+  req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>,
+  res: Response<ExtractResponse>,
+  extractId: string,
+) {
  // Means that are in the non-queue system
  // TODO: Remove this once all teams have transitioned to the new system
  try {
@ -29,7 +33,7 @@ export async function oldExtract(req: RequestWithAuth<{}, ExtractResponse, Extra
      error: "Internal server error",
    });
  }
-  }
+}
 /**
 * Extracts data from the provided URLs based on the request parameters.
 * Currently in beta.
@ -53,7 +57,10 @@ export async function extractController(
    extractId,
  };

-  if(await getTeamIdSyncB(req.auth.team_id) && req.body.origin !== "api-sdk") {
+  if (
+    (await getTeamIdSyncB(req.auth.team_id)) &&
+    req.body.origin !== "api-sdk"
+  ) {
    return await oldExtract(req, res, extractId);
  }

--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@ -86,11 +86,15 @@ export async function getMapResults({

  // If sitemapOnly is true, only get links from sitemap
  if (crawlerOptions.sitemapOnly) {
-    const sitemap = await crawler.tryGetSitemap(urls => {
+    const sitemap = await crawler.tryGetSitemap(
+      (urls) => {
        urls.forEach((x) => {
          links.push(x);
        });
-    }, true, true);
+      },
+      true,
+      true,
+    );
    if (sitemap > 0) {
      links = links
        .slice(1)
@ -145,7 +149,9 @@ export async function getMapResults({

    // Parallelize sitemap fetch with serper search
    const [_, ...searchResults] = await Promise.all([
-      ignoreSitemap ? null : crawler.tryGetSitemap(urls => {
+      ignoreSitemap
+        ? null
+        : crawler.tryGetSitemap((urls) => {
            links.push(...urls);
          }, true),
      ...(cachedResult ? [] : pagePromises),
--- a/apps/api/src/controllers/v1/scrape-status.ts
+++ b/apps/api/src/controllers/v1/scrape-status.ts
@ -18,7 +18,10 @@ export async function scrapeStatusController(req: any, res: any) {

  const job = await supabaseGetJobByIdOnlyData(req.params.jobId);

-  if (!allowedTeams.includes(job?.team_id) || job?.team_id !== req.auth.team_id) {
+  if (
+    !allowedTeams.includes(job?.team_id) ||
+    job?.team_id !== req.auth.team_id
+  ) {
    return res.status(403).json({
      success: false,
      error: "You are not allowed to access this resource.",
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@ -200,7 +200,8 @@ export const extractV1Options = z
    schema: z
      .any()
      .optional()
-      .refine((val) => {
+      .refine(
+        (val) => {
          if (!val) return true; // Allow undefined schema
          try {
            const validate = ajv.compile(val);
@ -208,9 +209,11 @@ export const extractV1Options = z
          } catch (e) {
            return false;
          }
-      }, {
+        },
+        {
          message: "Invalid JSON schema.",
-      }),
+        },
+      ),
    limit: z.number().int().positive().finite().safe().optional(),
    ignoreSitemap: z.boolean().default(false),
    includeSubdomains: z.boolean().default(true),
@ -452,7 +455,7 @@ export type Document = {
    description: string;
    url: string;
  };
-}
+};

 export type ErrorResponse = {
  success: false;
@ -477,7 +480,7 @@ export interface ScrapeResponseRequestTest {

 export interface URLTrace {
  url: string;
-  status: 'mapped' | 'scraped' | 'error';
+  status: "mapped" | "scraped" | "error";
  timing: {
    discoveredAt: string;
    scrapedAt?: string;
@ -785,9 +788,18 @@ export function toLegacyDocument(
  };
 }

-export const searchRequestSchema = z.object({
+export const searchRequestSchema = z
+  .object({
    query: z.string(),
-  limit: z.number().int().positive().finite().safe().max(10).optional().default(5),
+    limit: z
+      .number()
+      .int()
+      .positive()
+      .finite()
+      .safe()
+      .max(10)
+      .optional()
+      .default(5),
    tbs: z.string().optional(),
    filter: z.string().optional(),
    lang: z.string().optional().default("en"),
@ -795,18 +807,27 @@ export const searchRequestSchema = z.object({
    location: z.string().optional(),
    origin: z.string().optional().default("api"),
    timeout: z.number().int().positive().finite().safe().default(60000),
-  scrapeOptions: scrapeOptions.extend({
-    formats: z.array(z.enum([
+    scrapeOptions: scrapeOptions
+      .extend({
+        formats: z
+          .array(
+            z.enum([
              "markdown",
              "html",
              "rawHtml",
              "links",
              "screenshot",
              "screenshot@fullPage",
-      "extract"
-    ])).default([])
-  }).default({}),
-}).strict("Unrecognized key in body -- please review the v1 API documentation for request body changes");
+              "extract",
+            ]),
+          )
+          .default([]),
+      })
+      .default({}),
+  })
+  .strict(
+    "Unrecognized key in body -- please review the v1 API documentation for request body changes",
+  );

 export type SearchRequest = z.infer<typeof searchRequestSchema>;

--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -45,7 +45,10 @@ const serverAdapter = new ExpressAdapter();
 serverAdapter.setBasePath(`/admin/${process.env.BULL_AUTH_KEY}/queues`);

 const { addQueue, removeQueue, setQueues, replaceQueues } = createBullBoard({
-  queues: [new BullAdapter(getScrapeQueue()), new BullAdapter(getExtractQueue())],
+  queues: [
+    new BullAdapter(getScrapeQueue()),
+    new BullAdapter(getExtractQueue()),
+  ],
  serverAdapter: serverAdapter,
 });

--- a/apps/api/src/lib/canonical-url.test.ts
+++ b/apps/api/src/lib/canonical-url.test.ts
@ -1,91 +1,89 @@
-import { normalizeUrl, normalizeUrlOnlyHostname } from './canonical-url';
+import { normalizeUrl, normalizeUrlOnlyHostname } from "./canonical-url";

-describe('normalizeUrlOnlyHostname', () => {
-  it('should remove protocol and www from URL', () => {
-    const url = 'https://www.example.com';
-    const expected = 'example.com';
+describe("normalizeUrlOnlyHostname", () => {
+  it("should remove protocol and www from URL", () => {
+    const url = "https://www.example.com";
+    const expected = "example.com";
    expect(normalizeUrlOnlyHostname(url)).toBe(expected);
  });

-  it('should remove only protocol if www is not present', () => {
-    const url = 'https://example.com';
-    const expected = 'example.com';
+  it("should remove only protocol if www is not present", () => {
+    const url = "https://example.com";
+    const expected = "example.com";
    expect(normalizeUrlOnlyHostname(url)).toBe(expected);
  });

-  it('should handle URLs without protocol', () => {
-    const url = 'www.example.com';
-    const expected = 'example.com';
+  it("should handle URLs without protocol", () => {
+    const url = "www.example.com";
+    const expected = "example.com";
    expect(normalizeUrlOnlyHostname(url)).toBe(expected);
  });

-  it('should handle URLs without protocol and www', () => {
-    const url = 'example.com';
-    const expected = 'example.com';
+  it("should handle URLs without protocol and www", () => {
+    const url = "example.com";
+    const expected = "example.com";
    expect(normalizeUrlOnlyHostname(url)).toBe(expected);
  });

-  it('should handle URLs with paths', () => {
-    const url = 'https://www.example.com/path/to/resource';
-    const expected = 'example.com';
+  it("should handle URLs with paths", () => {
+    const url = "https://www.example.com/path/to/resource";
+    const expected = "example.com";
    expect(normalizeUrlOnlyHostname(url)).toBe(expected);
  });

-  it('should handle invalid URLs gracefully', () => {
-    const url = 'not a valid url';
-    const expected = 'not a valid url';
+  it("should handle invalid URLs gracefully", () => {
+    const url = "not a valid url";
+    const expected = "not a valid url";
    expect(normalizeUrlOnlyHostname(url)).toBe(expected);
  });
 });

-
-
-describe('normalizeUrl', () => {
-  it('should remove protocol and www from URL', () => {
-    const url = 'https://www.example.com';
-    const expected = 'example.com';
+describe("normalizeUrl", () => {
+  it("should remove protocol and www from URL", () => {
+    const url = "https://www.example.com";
+    const expected = "example.com";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should remove only protocol if www is not present', () => {
-    const url = 'https://example.com';
-    const expected = 'example.com';
+  it("should remove only protocol if www is not present", () => {
+    const url = "https://example.com";
+    const expected = "example.com";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should handle URLs without protocol', () => {
-    const url = 'www.example.com';
-    const expected = 'example.com';
+  it("should handle URLs without protocol", () => {
+    const url = "www.example.com";
+    const expected = "example.com";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should handle URLs without protocol and www', () => {
-    const url = 'example.com';
-    const expected = 'example.com';
+  it("should handle URLs without protocol and www", () => {
+    const url = "example.com";
+    const expected = "example.com";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should handle URLs with paths', () => {
-    const url = 'https://www.example.com/path/to/resource';
-    const expected = 'example.com/path/to/resource';
+  it("should handle URLs with paths", () => {
+    const url = "https://www.example.com/path/to/resource";
+    const expected = "example.com/path/to/resource";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should handle URLs with trailing slash', () => {
-    const url = 'https://www.example.com/';
-    const expected = 'example.com';
+  it("should handle URLs with trailing slash", () => {
+    const url = "https://www.example.com/";
+    const expected = "example.com";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should handle URLs with trailing slash and path', () => {
-    const url = 'https://www.example.com/path/';
-    const expected = 'example.com/path';
+  it("should handle URLs with trailing slash and path", () => {
+    const url = "https://www.example.com/path/";
+    const expected = "example.com/path";
    expect(normalizeUrl(url)).toBe(expected);
  });

-  it('should handle invalid URLs gracefully', () => {
-    const url = 'not a valid url';
-    const expected = 'not a valid url';
+  it("should handle invalid URLs gracefully", () => {
+    const url = "not a valid url";
+    const expected = "not a valid url";
    expect(normalizeUrl(url)).toBe(expected);
  });
 });
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -322,7 +322,7 @@ export async function lockURLs(
 export async function lockURLsIndividually(
  id: string,
  sc: StoredCrawl,
-  jobs: { id: string; url: string; }[],
+  jobs: { id: string; url: string }[],
 ) {
  const out: typeof jobs = [];

--- a/apps/api/src/lib/extract/config.ts
+++ b/apps/api/src/lib/extract/config.ts
@ -6,6 +6,4 @@ export const extractConfig = {
  MIN_REQUIRED_LINKS: 1,
 };

-export const CUSTOM_U_TEAMS = [
-  "874d40cc-a5c0-4e93-b661-9ddfbad5e51e"
-]
+export const CUSTOM_U_TEAMS = ["874d40cc-a5c0-4e93-b661-9ddfbad5e51e"];
--- a/apps/api/src/lib/extract/extract-redis.ts
+++ b/apps/api/src/lib/extract/extract-redis.ts
@ -21,14 +21,19 @@ export async function getExtract(id: string): Promise<StoredExtract | null> {
  return x ? JSON.parse(x) : null;
 }

-export async function updateExtract(id: string, extract: Partial<StoredExtract>) {
+export async function updateExtract(
+  id: string,
+  extract: Partial<StoredExtract>,
+) {
  const current = await getExtract(id);
  if (!current) return;
-  await redisConnection.set("extract:" + id, JSON.stringify({ ...current, ...extract }));
+  await redisConnection.set(
+    "extract:" + id,
+    JSON.stringify({ ...current, ...extract }),
+  );
  await redisConnection.expire("extract:" + id, 24 * 60 * 60, "NX");
 }

-
 export async function getExtractExpiry(id: string): Promise<Date> {
  const d = new Date();
  const ttl = await redisConnection.pttl("extract:" + id);
--- a/apps/api/src/lib/extract/extraction-service.ts
+++ b/apps/api/src/lib/extract/extraction-service.ts
@ -30,7 +30,7 @@ interface ExtractResult {

 function getRootDomain(url: string): string {
  try {
-    if(url.endsWith("/*")) {
+    if (url.endsWith("/*")) {
      url = url.slice(0, -2);
    }
    const urlObj = new URL(url);
@ -40,14 +40,18 @@ function getRootDomain(url: string): string {
  }
 }

-export async function performExtraction(extractId: string, options: ExtractServiceOptions): Promise<ExtractResult> {
+export async function performExtraction(
+  extractId: string,
+  options: ExtractServiceOptions,
+): Promise<ExtractResult> {
  const { request, teamId, plan, subId } = options;
  const urlTraces: URLTrace[] = [];
  let docs: Document[] = [];

  // Process URLs
-  const urlPromises = request.urls.map(url => 
-    processUrl({
+  const urlPromises = request.urls.map((url) =>
+    processUrl(
+      {
        url,
        prompt: request.prompt,
        teamId,
@ -56,16 +60,19 @@ export async function performExtraction(extractId: string, options: ExtractServi
        origin: request.origin,
        limit: request.limit,
        includeSubdomains: request.includeSubdomains,
-    }, urlTraces)
+      },
+      urlTraces,
+    ),
  );

  const processedUrls = await Promise.all(urlPromises);
-  const links = processedUrls.flat().filter(url => url);
+  const links = processedUrls.flat().filter((url) => url);

  if (links.length === 0) {
    return {
      success: false,
-      error: "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs.",
+      error:
+        "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs.",
      extractId,
      urlTrace: urlTraces,
    };
@ -73,14 +80,17 @@ export async function performExtraction(extractId: string, options: ExtractServi

  // Scrape documents
  const timeout = Math.floor((request.timeout || 40000) * 0.7) || 30000;
-  const scrapePromises = links.map(url =>
-    scrapeDocument({
+  const scrapePromises = links.map((url) =>
+    scrapeDocument(
+      {
        url,
        teamId,
        plan,
        origin: request.origin || "api",
        timeout,
-    }, urlTraces)
+      },
+      urlTraces,
+    ),
  );

  try {
@ -114,13 +124,16 @@ export async function performExtraction(extractId: string, options: ExtractServi

  // Update token usage in traces
  if (completions.numTokens) {
-    const totalLength = docs.reduce((sum, doc) => sum + (doc.markdown?.length || 0), 0);
+    const totalLength = docs.reduce(
+      (sum, doc) => sum + (doc.markdown?.length || 0),
+      0,
+    );
    docs.forEach((doc) => {
      if (doc.metadata?.sourceURL) {
        const trace = urlTraces.find((t) => t.url === doc.metadata.sourceURL);
        if (trace && trace.contentStats) {
          trace.contentStats.tokensUsed = Math.floor(
-            ((doc.markdown?.length || 0) / totalLength) * completions.numTokens
+            ((doc.markdown?.length || 0) / totalLength) * completions.numTokens,
          );
        }
      }
@ -185,7 +198,7 @@ export async function performExtraction(extractId: string, options: ExtractServi

  let linksBilled = links.length * 5;

-  if(CUSTOM_U_TEAMS.includes(teamId)){
+  if (CUSTOM_U_TEAMS.includes(teamId)) {
    linksBilled = 1;
  }
  // Bill team for usage
@ -213,12 +226,12 @@ export async function performExtraction(extractId: string, options: ExtractServi
    updateExtract(extractId, {
      status: "completed",
    }).catch((error) => {
-      logger.error(`Failed to update extract ${extractId} status to completed: ${error}`);
+      logger.error(
+        `Failed to update extract ${extractId} status to completed: ${error}`,
+      );
    });
  });

-
-
  return {
    success: true,
    data: completions.extract ?? {},
--- a/apps/api/src/lib/extract/index/pinecone.ts
+++ b/apps/api/src/lib/extract/index/pinecone.ts
@ -1,6 +1,6 @@
-import { Pinecone } from '@pinecone-database/pinecone';
-import { Document } from '../../../controllers/v1/types';
-import { logger } from '../../logger';
+import { Pinecone } from "@pinecone-database/pinecone";
+import { Document } from "../../../controllers/v1/types";
+import { logger } from "../../logger";
 import OpenAI from "openai";

 const openai = new OpenAI({
@ -48,34 +48,43 @@ export async function indexPage({
  document,
  originUrl,
  crawlId,
-  teamId
+  teamId,
 }: {
  document: Document;
  originUrl: string;
  crawlId?: string;
  teamId?: string;
-}
-) {
+}) {
  try {
    const index = pinecone.index(INDEX_NAME);

    // Trim markdown if it's too long
    let trimmedMarkdown = document.markdown;
-    if (trimmedMarkdown && Buffer.byteLength(trimmedMarkdown, 'utf-8') > MAX_METADATA_SIZE) {
-      trimmedMarkdown = trimmedMarkdown.slice(0, Math.floor(MAX_METADATA_SIZE / 2)); // Using half the size to be safe with UTF-8 encoding
+    if (
+      trimmedMarkdown &&
+      Buffer.byteLength(trimmedMarkdown, "utf-8") > MAX_METADATA_SIZE
+    ) {
+      trimmedMarkdown = trimmedMarkdown.slice(
+        0,
+        Math.floor(MAX_METADATA_SIZE / 2),
+      ); // Using half the size to be safe with UTF-8 encoding
    }

    // Create text to embed
    const textToEmbed = [
      document.metadata.title,
      document.metadata.description,
-      trimmedMarkdown
-    ].filter(Boolean).join('\n\n');
+      trimmedMarkdown,
+    ]
+      .filter(Boolean)
+      .join("\n\n");

    // Get embedding from OpenAI
    const embedding = await getEmbedding(textToEmbed);

-    const normalizedUrl = normalizeUrl(document.metadata.sourceURL || document.metadata.url!);
+    const normalizedUrl = normalizeUrl(
+      document.metadata.sourceURL || document.metadata.url!,
+    );

    // Prepare metadata
    const metadata: PageMetadata = {
@ -86,29 +95,30 @@ export async function indexPage({
      crawlId,
      teamId,
      markdown: trimmedMarkdown,
-      timestamp: Date.now()
+      timestamp: Date.now(),
    };

    // Upsert to Pinecone
-    await index.upsert([{
+    await index.upsert([
+      {
        id: normalizedUrl,
        values: embedding,
        metadata: {
          ...metadata,
-        [document.metadata.sourceURL || document.metadata.url!]: true
-      }
-    }]);
+          [document.metadata.sourceURL || document.metadata.url!]: true,
+        },
+      },
+    ]);

-    logger.debug('Successfully indexed page in Pinecone', {
+    logger.debug("Successfully indexed page in Pinecone", {
      url: metadata.url,
-      crawlId
+      crawlId,
    });
-
  } catch (error) {
-    logger.error('Failed to index page in Pinecone', {
+    logger.error("Failed to index page in Pinecone", {
      error,
      url: document.metadata.sourceURL || document.metadata.url,
-      crawlId
+      crawlId,
    });
  }
 }
@ -116,7 +126,7 @@ export async function indexPage({
 export async function searchSimilarPages(
  query: string,
  originUrl?: string,
-  limit: number = 10
+  limit: number = 10,
 ) {
  try {
    const index = pinecone.index(INDEX_NAME);
@ -127,31 +137,30 @@ export async function searchSimilarPages(
    const queryParams: any = {
      vector: queryEmbedding,
      topK: limit,
-      includeMetadata: true
+      includeMetadata: true,
    };

    const normalizedOriginUrl = originUrl ? normalizeUrl(originUrl) : undefined;
    // Add filter if originUrl is provided
    if (normalizedOriginUrl) {
      queryParams.filter = {
-        originUrl: { $eq: normalizedOriginUrl }
+        originUrl: { $eq: normalizedOriginUrl },
      };
    }

    const results = await index.query(queryParams);
-    return results.matches.map(match => ({
+    return results.matches.map((match) => ({
      url: match.metadata?.url,
      title: match.metadata?.title,
      description: match.metadata?.description,
      score: match.score,
-      markdown: match.metadata?.markdown
+      markdown: match.metadata?.markdown,
    }));
-
  } catch (error) {
-    logger.error('Failed to search similar pages in Pinecone', {
+    logger.error("Failed to search similar pages in Pinecone", {
      error,
      query,
-      originUrl
+      originUrl,
    });
    return [];
  }
--- a/apps/api/src/lib/extract/reranker.ts
+++ b/apps/api/src/lib/extract/reranker.ts
@ -9,8 +9,6 @@ const cohere = new CohereClient({
  token: process.env.COHERE_API_KEY,
 });

-
-
 interface RankingResult {
  mappedLinks: MapDocument[];
  linksAndScores: {
@ -59,7 +57,6 @@ export async function rerankLinks(
    searchQuery,
  );

-  
  // First try with high threshold
  let filteredLinks = filterAndProcessLinks(
    mappedLinks,
@ -67,8 +64,6 @@ export async function rerankLinks(
    extractConfig.INITIAL_SCORE_THRESHOLD,
  );

-
-
  // If we don't have enough high-quality links, try with lower threshold
  if (filteredLinks.length < extractConfig.MIN_REQUIRED_LINKS) {
    logger.info(
@ -102,7 +97,7 @@ export async function rerankLinks(
    if (trace) {
      trace.relevanceScore = score.score;
      // If URL didn't make it through filtering, mark it as filtered out
-      if (!filteredLinks.some(link => link.url === score.link)) {
+      if (!filteredLinks.some((link) => link.url === score.link)) {
        trace.warning = `Relevance score ${score.score} below threshold`;
        trace.usedInCompletion = false;
      }
@ -112,18 +107,18 @@ export async function rerankLinks(
  const rankedLinks = filteredLinks.slice(0, extractConfig.MAX_RANKING_LIMIT);

  // Mark URLs that will be used in completion
-  rankedLinks.forEach(link => {
-    const trace = urlTraces.find(t => t.url === link.url);
+  rankedLinks.forEach((link) => {
+    const trace = urlTraces.find((t) => t.url === link.url);
    if (trace) {
      trace.usedInCompletion = true;
    }
  });

  // Mark URLs that were dropped due to ranking limit
-  filteredLinks.slice(extractConfig.MAX_RANKING_LIMIT).forEach(link => {
-    const trace = urlTraces.find(t => t.url === link.url);
+  filteredLinks.slice(extractConfig.MAX_RANKING_LIMIT).forEach((link) => {
+    const trace = urlTraces.find((t) => t.url === link.url);
    if (trace) {
-      trace.warning = 'Excluded due to ranking limit';
+      trace.warning = "Excluded due to ranking limit";
      trace.usedInCompletion = false;
    }
  });
--- a/apps/api/src/lib/extract/url-processor.ts
+++ b/apps/api/src/lib/extract/url-processor.ts
@ -20,10 +20,13 @@ interface ProcessUrlOptions {
  includeSubdomains?: boolean;
 }

-export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace[]): Promise<string[]> {
+export async function processUrl(
+  options: ProcessUrlOptions,
+  urlTraces: URLTrace[],
+): Promise<string[]> {
  const trace: URLTrace = {
    url: options.url,
-    status: 'mapped',
+    status: "mapped",
    timing: {
      discoveredAt: new Date().toISOString(),
    },
@ -35,8 +38,8 @@ export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace
      trace.usedInCompletion = true;
      return [options.url];
    }
-    trace.status = 'error';
-    trace.error = 'URL is blocked';
+    trace.status = "error";
+    trace.error = "URL is blocked";
    trace.usedInCompletion = false;
    return [];
  }
@ -46,9 +49,10 @@ export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace

  let rephrasedPrompt = options.prompt;
  if (options.prompt) {
-    rephrasedPrompt = await generateBasicCompletion(
-      buildRefrasedPrompt(options.prompt, baseUrl)
-    ) ?? options.prompt;
+    rephrasedPrompt =
+      (await generateBasicCompletion(
+        buildRefrasedPrompt(options.prompt, baseUrl),
+      )) ?? options.prompt;
  }

  try {
@ -70,11 +74,11 @@ export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace
    let uniqueUrls = removeDuplicateUrls(allUrls);

    // Track all discovered URLs
-    uniqueUrls.forEach(discoveredUrl => {
-      if (!urlTraces.some(t => t.url === discoveredUrl)) {
+    uniqueUrls.forEach((discoveredUrl) => {
+      if (!urlTraces.some((t) => t.url === discoveredUrl)) {
        urlTraces.push({
          url: discoveredUrl,
-          status: 'mapped',
+          status: "mapped",
          timing: {
            discoveredAt: new Date().toISOString(),
          },
@ -102,12 +106,12 @@ export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace
      uniqueUrls = removeDuplicateUrls(allUrls);

      // Track all discovered URLs
-      uniqueUrls.forEach(discoveredUrl => {
-        if (!urlTraces.some(t => t.url === discoveredUrl)) {
+      uniqueUrls.forEach((discoveredUrl) => {
+        if (!urlTraces.some((t) => t.url === discoveredUrl)) {
          urlTraces.push({
            url: discoveredUrl,
-            status: 'mapped',
-            warning: 'Broader search. Not limiting map results to prompt.',
+            status: "mapped",
+            warning: "Broader search. Not limiting map results to prompt.",
            timing: {
              discoveredAt: new Date().toISOString(),
            },
@ -118,11 +122,11 @@ export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace
    }

    // Track all discovered URLs
-    uniqueUrls.forEach(discoveredUrl => {
-      if (!urlTraces.some(t => t.url === discoveredUrl)) {
+    uniqueUrls.forEach((discoveredUrl) => {
+      if (!urlTraces.some((t) => t.url === discoveredUrl)) {
        urlTraces.push({
          url: discoveredUrl,
-          status: 'mapped',
+          status: "mapped",
          timing: {
            discoveredAt: new Date().toISOString(),
          },
@ -155,9 +159,9 @@ export async function processUrl(options: ProcessUrlOptions, urlTraces: URLTrace
      mappedLinks = await rerankLinks(mappedLinks, searchQuery, urlTraces);
    }

-    return mappedLinks.map(x => x.url);
+    return mappedLinks.map((x) => x.url);
  } catch (error) {
-    trace.status = 'error';
+    trace.status = "error";
    trace.error = error.message;
    trace.usedInCompletion = false;
    return [];
--- a/apps/api/src/lib/logger.ts
+++ b/apps/api/src/lib/logger.ts
@ -42,11 +42,18 @@ export const logger = winston.createLogger({
    },
  }),
  transports: [
-    ...(process.env.FIRECRAWL_LOG_TO_FILE ? [
+    ...(process.env.FIRECRAWL_LOG_TO_FILE
+      ? [
          new winston.transports.File({
-        filename: "firecrawl-" + (process.argv[1].includes("worker") ? "worker" : "app") + "-" + crypto.randomUUID() + ".log",
-      })
-    ] : []),
+            filename:
+              "firecrawl-" +
+              (process.argv[1].includes("worker") ? "worker" : "app") +
+              "-" +
+              crypto.randomUUID() +
+              ".log",
+          }),
+        ]
+      : []),
    new winston.transports.Console({
      format: winston.format.combine(
        winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@ -175,11 +175,10 @@ export async function runWebScraper({
      }

      // If the team is the background index team, return the response
-      if(team_id === process.env.BACKGROUND_INDEX_TEAM_ID!) {
+      if (team_id === process.env.BACKGROUND_INDEX_TEAM_ID!) {
        return response;
      }

-
      billTeam(team_id, undefined, creditsToBeBilled, logger).catch((error) => {
        logger.error(
          `Failed to bill team ${team_id} for ${creditsToBeBilled} credits`,
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@ -192,7 +192,8 @@ v1Router.get(
  wrap((req: any, res): any => crawlStatusController(req, res, true)),
 );

-v1Router.get("/scrape/:jobId",
+v1Router.get(
+  "/scrape/:jobId",
  authMiddleware(RateLimiterMode.CrawlStatus),
  wrap(scrapeStatusController),
 );
@ -242,6 +243,3 @@ v1Router.get(
  authMiddleware(RateLimiterMode.CrawlStatus),
  wrap(creditUsageController),
 );
-
-
-
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@ -219,18 +219,29 @@ export class WebCrawler {
    const _urlsHandler = async (urls: string[]) => {
      let uniqueURLs: string[] = [];
      for (const url of urls) {
-        if (await redisConnection.sadd("sitemap:" + this.jobId + ":links", normalizeUrl(url))) {
+        if (
+          await redisConnection.sadd(
+            "sitemap:" + this.jobId + ":links",
+            normalizeUrl(url),
+          )
+        ) {
          uniqueURLs.push(url);
        }
      }

-      await redisConnection.expire("sitemap:" + this.jobId + ":links", 3600, "NX");
+      await redisConnection.expire(
+        "sitemap:" + this.jobId + ":links",
+        3600,
+        "NX",
+      );
      if (uniqueURLs.length > 0) {
        urlsHandler(uniqueURLs);
      }
    };

-    let count = await this.tryFetchSitemapLinks(this.initialUrl, (urls: string[]) => {
+    let count = await this.tryFetchSitemapLinks(
+      this.initialUrl,
+      (urls: string[]) => {
        if (fromMap && onlySitemap) {
          return urlsHandler(urls);
        } else {
@ -243,10 +254,16 @@ export class WebCrawler {
          leftOfLimit -= filteredLinks.length;
          return _urlsHandler(filteredLinks);
        }
-    });
+      },
+    );

    if (count > 0) {
-      if (await redisConnection.sadd("sitemap:" + this.jobId + ":links", normalizeUrl(this.initialUrl))) {
+      if (
+        await redisConnection.sadd(
+          "sitemap:" + this.jobId + ":links",
+          normalizeUrl(this.initialUrl),
+        )
+      ) {
        urlsHandler([this.initialUrl]);
      }
      count++;
@ -470,8 +487,13 @@ export class WebCrawler {
    return socialMediaOrEmail.some((ext) => url.includes(ext));
  }

-  private async tryFetchSitemapLinks(url: string, urlsHandler: (urls: string[]) => unknown): Promise<number> {
-    const sitemapUrl = url.endsWith(".xml") ? url : `${url}${url.endsWith("/") ? "" : "/"}sitemap.xml`;
+  private async tryFetchSitemapLinks(
+    url: string,
+    urlsHandler: (urls: string[]) => unknown,
+  ): Promise<number> {
+    const sitemapUrl = url.endsWith(".xml")
+      ? url
+      : `${url}${url.endsWith("/") ? "" : "/"}sitemap.xml`;

    let sitemapCount: number = 0;

@ -482,37 +504,43 @@ export class WebCrawler {
        this.logger,
      );
    } catch (error) {
-      this.logger.debug(
-        `Failed to fetch sitemap from ${sitemapUrl}`,
-        { method: "tryFetchSitemapLinks", sitemapUrl, error },
-      );
+      this.logger.debug(`Failed to fetch sitemap from ${sitemapUrl}`, {
+        method: "tryFetchSitemapLinks",
+        sitemapUrl,
+        error,
+      });
    }

    // If this is a subdomain, also try to get sitemap from the main domain
    try {
      const urlObj = new URL(url);
      const hostname = urlObj.hostname;
-      const domainParts = hostname.split('.');
+      const domainParts = hostname.split(".");

      // Check if this is a subdomain (has more than 2 parts and not www)
-      if (domainParts.length > 2 && domainParts[0] !== 'www') {
+      if (domainParts.length > 2 && domainParts[0] !== "www") {
        // Get the main domain by taking the last two parts
-        const mainDomain = domainParts.slice(-2).join('.');
+        const mainDomain = domainParts.slice(-2).join(".");
        const mainDomainUrl = `${urlObj.protocol}//${mainDomain}`;
        const mainDomainSitemapUrl = `${mainDomainUrl}/sitemap.xml`;

        try {
          // Get all links from the main domain's sitemap
          sitemapCount += await getLinksFromSitemap(
-            { sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) {
-              return urlsHandler(urls.filter(link => {
+            {
+              sitemapUrl: mainDomainSitemapUrl,
+              urlsHandler(urls) {
+                return urlsHandler(
+                  urls.filter((link) => {
                    try {
                      const linkUrl = new URL(link);
                      return linkUrl.hostname.endsWith(hostname);
-                } catch {
-                }
-              }))
-            }, mode: "fire-engine" },
+                    } catch {}
+                  }),
+                );
+              },
+              mode: "fire-engine",
+            },
            this.logger,
          );
        } catch (error) {
--- a/apps/api/src/scraper/WebScraper/sitemap.ts
+++ b/apps/api/src/scraper/WebScraper/sitemap.ts
@ -15,7 +15,7 @@ export async function getLinksFromSitemap(
    mode = "axios",
  }: {
    sitemapUrl: string;
-    urlsHandler(urls: string[]): unknown,
+    urlsHandler(urls: string[]): unknown;
    mode?: "axios" | "fire-engine";
  },
  logger: Logger,
@ -31,7 +31,10 @@ export async function getLinksFromSitemap(
          { forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true },
        );
        if (!response.success) {
-          logger.debug("Failed to scrape sitemap via TLSClient, falling back to axios...", { error: response.error })
+          logger.debug(
+            "Failed to scrape sitemap via TLSClient, falling back to axios...",
+            { error: response.error },
+          );
          const ar = await axios.get(sitemapUrl, { timeout: axiosTimeout });
          content = ar.data;
        } else {
@ -63,14 +66,11 @@ export async function getLinksFromSitemap(
        .map((sitemap) => sitemap.loc[0].trim());

      const sitemapPromises: Promise<number>[] = sitemapUrls.map((sitemapUrl) =>
-        getLinksFromSitemap(
-          { sitemapUrl, urlsHandler, mode },
-          logger,
-        ),
+        getLinksFromSitemap({ sitemapUrl, urlsHandler, mode }, logger),
      );

      const results = await Promise.all(sitemapPromises);
-      count = results.reduce((a,x) => a + x)
+      count = results.reduce((a, x) => a + x);
    } else if (root && root.url) {
      // Check if any URLs point to additional sitemaps
      const xmlSitemaps: string[] = root.url
@ -78,7 +78,7 @@ export async function getLinksFromSitemap(
          (url) =>
            url.loc &&
            url.loc.length > 0 &&
-            url.loc[0].trim().toLowerCase().endsWith('.xml')
+            url.loc[0].trim().toLowerCase().endsWith(".xml"),
        )
        .map((url) => url.loc[0].trim());

@ -90,7 +90,10 @@ export async function getLinksFromSitemap(
            logger,
          ),
        );
-        count += (await Promise.all(sitemapPromises)).reduce((a,x) => a + x, 0);
+        count += (await Promise.all(sitemapPromises)).reduce(
+          (a, x) => a + x,
+          0,
+        );
      }

      const validUrls = root.url
@ -98,7 +101,7 @@ export async function getLinksFromSitemap(
          (url) =>
            url.loc &&
            url.loc.length > 0 &&
-            !url.loc[0].trim().toLowerCase().endsWith('.xml') &&
+            !url.loc[0].trim().toLowerCase().endsWith(".xml") &&
            !WebCrawler.prototype.isFile(url.loc[0].trim()),
        )
        .map((url) => url.loc[0].trim());
--- a/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
@ -3,7 +3,10 @@ import { EngineScrapeResult } from "..";
 import { Meta } from "../..";
 import { TimeoutError } from "../../error";
 import { specialtyScrapeCheck } from "../utils/specialtyHandler";
-import { InsecureConnectionError, makeSecureDispatcher } from "../utils/safeFetch";
+import {
+  InsecureConnectionError,
+  makeSecureDispatcher,
+} from "../utils/safeFetch";

 export async function scrapeURLWithFetch(
  meta: Meta,
@ -20,7 +23,9 @@ export async function scrapeURLWithFetch(
        headers: meta.options.headers,
      }),
      (async () => {
-        await new Promise((resolve) => setTimeout(() => resolve(null), timeout));
+        await new Promise((resolve) =>
+          setTimeout(() => resolve(null), timeout),
+        );
        throw new TimeoutError(
          "Fetch was unable to scrape the page before timing out",
          { cause: { timeout } },
@ -28,7 +33,10 @@ export async function scrapeURLWithFetch(
      })(),
    ]);
  } catch (error) {
-    if (error instanceof TypeError && error.cause instanceof InsecureConnectionError) {
+    if (
+      error instanceof TypeError &&
+      error.cause instanceof InsecureConnectionError
+    ) {
      throw error.cause;
    } else {
      throw error;
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
@ -3,7 +3,12 @@ import * as Sentry from "@sentry/node";
 import { z } from "zod";

 import { robustFetch } from "../../lib/fetch";
-import { ActionError, EngineError, SiteError, UnsupportedFileError } from "../../error";
+import {
+  ActionError,
+  EngineError,
+  SiteError,
+  UnsupportedFileError,
+} from "../../error";

 const successSchema = z.object({
  jobId: z.string(),
@ -37,10 +42,13 @@ const successSchema = z.object({
    .optional(),

  // chrome-cdp only -- file download handler
-  file: z.object({
+  file: z
+    .object({
      name: z.string(),
      content: z.string(),
-  }).optional().or(z.null()),
+    })
+    .optional()
+    .or(z.null()),
 });

 export type FireEngineCheckStatusSuccess = z.infer<typeof successSchema>;
@ -121,7 +129,9 @@ export async function fireEngineCheckStatus(
      typeof status.error === "string" &&
      status.error.includes("File size exceeds")
    ) {
-      throw new UnsupportedFileError("File size exceeds " + status.error.split("File size exceeds ")[1]);
+      throw new UnsupportedFileError(
+        "File size exceeds " + status.error.split("File size exceeds ")[1],
+      );
    } else if (
      typeof status.error === "string" &&
      // TODO: improve this later
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
@ -13,7 +13,13 @@ import {
  FireEngineCheckStatusSuccess,
  StillProcessingError,
 } from "./checkStatus";
-import { ActionError, EngineError, SiteError, TimeoutError, UnsupportedFileError } from "../../error";
+import {
+  ActionError,
+  EngineError,
+  SiteError,
+  TimeoutError,
+  UnsupportedFileError,
+} from "../../error";
 import * as Sentry from "@sentry/node";
 import { Action } from "../../../../lib/entities";
 import { specialtyScrapeCheck } from "../utils/specialtyHandler";
--- a/apps/api/src/scraper/scrapeURL/engines/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/index.ts
@ -38,7 +38,7 @@ const useCache =
  process.env.CACHE_REDIS_URL !== undefined;

 export const engines: Engine[] = [
-  ...(useCache ? [ "cache" as const ] : []),
+  ...(useCache ? ["cache" as const] : []),
  ...(useFireEngine
    ? [
        "fire-engine;chrome-cdp" as const,
@ -298,7 +298,6 @@ export function buildFallbackList(meta: Meta): {
  engine: Engine;
  unsupportedFeatures: Set<FeatureFlag>;
 }[] {
-
  if (meta.internalOptions.useCache !== true) {
    const cacheIndex = engines.indexOf("cache");
    if (cacheIndex !== -1) {
--- a/apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts
@ -7,11 +7,18 @@ import { v4 as uuid } from "uuid";
 import * as undici from "undici";
 import { makeSecureDispatcher } from "./safeFetch";

-export async function fetchFileToBuffer(url: string, init?: undici.RequestInit): Promise<{
+export async function fetchFileToBuffer(
+  url: string,
+  init?: undici.RequestInit,
+): Promise<{
  response: undici.Response;
  buffer: Buffer;
 }> {
-  const response = await undici.fetch(url, { ...init, redirect: "follow", dispatcher: await makeSecureDispatcher(url) });
+  const response = await undici.fetch(url, {
+    ...init,
+    redirect: "follow",
+    dispatcher: await makeSecureDispatcher(url),
+  });
  return {
    response,
    buffer: Buffer.from(await response.arrayBuffer()),
@ -30,7 +37,11 @@ export async function downloadFile(
  const tempFileWrite = createWriteStream(tempFilePath);

  // TODO: maybe we could use tlsclient for this? for proxying
-  const response = await undici.fetch(url, { ...init, redirect: "follow", dispatcher: await makeSecureDispatcher(url) });
+  const response = await undici.fetch(url, {
+    ...init,
+    redirect: "follow",
+    dispatcher: await makeSecureDispatcher(url),
+  });

  // This should never happen in the current state of JS/Undici (2024), but let's check anyways.
  if (response.body === null) {
--- a/apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts
@ -5,36 +5,44 @@ import { Address6 } from "ip-address";

 export class InsecureConnectionError extends Error {
  constructor() {
-        super("Connection violated security rules.")
+    super("Connection violated security rules.");
  }
 }

 function isIPv4Private(address: string): boolean {
-    const parts = address.split(".").map(x => parseInt(x, 10));
-    return parts[0] === 0 // Current (local, "this") network
-        || parts[0] === 10 // Used for local communications within a private network
-        || (parts[0] === 100 && parts[1] >= 64 && parts[1] < 128) // Shared address space for communications between a service provider and its subscribers when using a carrier-grade NAT
-        || parts[0] === 127 // Used for loopback addresses to the local host
-        || (parts[0] === 169 && parts[1] === 254) // Used for link-local addresses between two hosts on a single link when no IP address is otherwise specified, such as would have normally been retrieved from a DHCP server
-        || (parts[0] === 127 && parts[1] >= 16 && parts[2] < 32) // Used for local communications within a private network
-        || (parts[0] === 192 && parts[1] === 0 && parts[2] === 0) // IETF Porotocol Assignments, DS-Lite (/29)
-        || (parts[0] === 192 && parts[1] === 0 && parts[2] === 2) // Assigned as TEST-NET-1, documentation and examples
-        || (parts[0] === 192 && parts[1] === 88 && parts[2] === 99) // Reserved. Formerly used for IPv6 to IPv4 relay (included IPv6 address block 2002::/16).
-        || (parts[0] === 192 && parts[1] === 168) // Used for local communications within a private network
-        || (parts[0] === 192 && parts[1] >= 18 && parts[1] < 20) // Used for benchmark testing of inter-network communications between two separate subnets
-        || (parts[0] === 198 && parts[1] === 51 && parts[2] === 100) // Assigned as TEST-NET-2, documentation and examples
-        || (parts[0] === 203 && parts[1] === 0 && parts[2] === 113) // Assigned as TEST-NET-3, documentation and examples
-        || (parts[0] >= 224 && parts[0] < 240) // In use for multicast (former Class D network)
-        || (parts[0] === 233 && parts[1] === 252 && parts[2] === 0) // Assigned as MCAST-TEST-NET, documentation and examples (Note that this is part of the above multicast space.)
-        || parts[0] >= 240 // Reserved for future use (former class E network)
-        || (parts[0] === 255 && parts[1] === 255 && parts[2] === 255 && parts[3] === 255) // Reserved for the "limited broadcast" destination address
+  const parts = address.split(".").map((x) => parseInt(x, 10));
+  return (
+    parts[0] === 0 || // Current (local, "this") network
+    parts[0] === 10 || // Used for local communications within a private network
+    (parts[0] === 100 && parts[1] >= 64 && parts[1] < 128) || // Shared address space for communications between a service provider and its subscribers when using a carrier-grade NAT
+    parts[0] === 127 || // Used for loopback addresses to the local host
+    (parts[0] === 169 && parts[1] === 254) || // Used for link-local addresses between two hosts on a single link when no IP address is otherwise specified, such as would have normally been retrieved from a DHCP server
+    (parts[0] === 127 && parts[1] >= 16 && parts[2] < 32) || // Used for local communications within a private network
+    (parts[0] === 192 && parts[1] === 0 && parts[2] === 0) || // IETF Porotocol Assignments, DS-Lite (/29)
+    (parts[0] === 192 && parts[1] === 0 && parts[2] === 2) || // Assigned as TEST-NET-1, documentation and examples
+    (parts[0] === 192 && parts[1] === 88 && parts[2] === 99) || // Reserved. Formerly used for IPv6 to IPv4 relay (included IPv6 address block 2002::/16).
+    (parts[0] === 192 && parts[1] === 168) || // Used for local communications within a private network
+    (parts[0] === 192 && parts[1] >= 18 && parts[1] < 20) || // Used for benchmark testing of inter-network communications between two separate subnets
+    (parts[0] === 198 && parts[1] === 51 && parts[2] === 100) || // Assigned as TEST-NET-2, documentation and examples
+    (parts[0] === 203 && parts[1] === 0 && parts[2] === 113) || // Assigned as TEST-NET-3, documentation and examples
+    (parts[0] >= 224 && parts[0] < 240) || // In use for multicast (former Class D network)
+    (parts[0] === 233 && parts[1] === 252 && parts[2] === 0) || // Assigned as MCAST-TEST-NET, documentation and examples (Note that this is part of the above multicast space.)
+    parts[0] >= 240 || // Reserved for future use (former class E network)
+    (parts[0] === 255 &&
+      parts[1] === 255 &&
+      parts[2] === 255 &&
+      parts[3] === 255)
+  ); // Reserved for the "limited broadcast" destination address
 }

 function isIPv6Private(ipv6) {
  return new Address6(ipv6).getScope() !== "Global";
 }

-export function makeSecureDispatcher(url: string, options?: undici.Agent.Options) {
+export function makeSecureDispatcher(
+  url: string,
+  options?: undici.Agent.Options,
+) {
  const agent = new undici.Agent({
    connect: {
      rejectUnauthorized: false, // bypass SSL failures -- this is fine
@ -46,12 +54,18 @@ export function makeSecureDispatcher(url: string, options?: undici.Agent.Options

  agent.on("connect", (_, targets) => {
    const client: undici.Client = targets.slice(-1)[0] as undici.Client;
-        const socketSymbol = Object.getOwnPropertySymbols(client).find(x => x.description === "socket")!;
+    const socketSymbol = Object.getOwnPropertySymbols(client).find(
+      (x) => x.description === "socket",
+    )!;
    const socket: Socket | TLSSocket = (client as any)[socketSymbol];

    if (socket.remoteAddress) {
-            if (socket.remoteFamily === "IPv4" ? isIPv4Private(socket.remoteAddress!) : isIPv6Private(socket.remoteAddress!)) {
-                socket.destroy(new InsecureConnectionError())
+      if (
+        socket.remoteFamily === "IPv4"
+          ? isIPv4Private(socket.remoteAddress!)
+          : isIPv6Private(socket.remoteAddress!)
+      ) {
+        socket.destroy(new InsecureConnectionError());
      }
    }
  });
--- a/apps/api/src/scraper/scrapeURL/index.ts
+++ b/apps/api/src/scraper/scrapeURL/index.ts
@ -420,7 +420,9 @@ export async function scrapeURL(
    } else if (error instanceof ActionError) {
      meta.logger.warn("scrapeURL: Action(s) failed to complete", { error });
    } else if (error instanceof UnsupportedFileError) {
-      meta.logger.warn("scrapeURL: Tried to scrape unsupported file", { error });
+      meta.logger.warn("scrapeURL: Tried to scrape unsupported file", {
+        error,
+      });
    } else {
      Sentry.captureException(error);
      meta.logger.error("scrapeURL: Unexpected error happened", { error });
--- a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts
@ -44,10 +44,15 @@ export function extractMetadata(
    title = soup("title").first().text().trim() || undefined;
    description = soup('meta[name="description"]').attr("content") || undefined;

-    const faviconLink = soup('link[rel="icon"]').attr("href") || soup('link[rel*="icon"]').first().attr("href") || undefined;
+    const faviconLink =
+      soup('link[rel="icon"]').attr("href") ||
+      soup('link[rel*="icon"]').first().attr("href") ||
+      undefined;
    if (faviconLink) {
      const baseUrl = new URL(meta.url).origin;
-      favicon = faviconLink.startsWith('http') ? faviconLink : `${baseUrl}${faviconLink}`;
+      favicon = faviconLink.startsWith("http")
+        ? faviconLink
+        : `${baseUrl}${faviconLink}`;
    }

    // Assuming the language is part of the URL as per the regex pattern
--- a/apps/api/src/scraper/scrapeURL/transformers/cache.ts
+++ b/apps/api/src/scraper/scrapeURL/transformers/cache.ts
@ -24,7 +24,6 @@ export function saveToCache(meta: Meta, document: Document): Document {
    return document;
  }

-
  const key = cacheKey(meta.url, meta.options, meta.internalOptions);

  if (key !== null) {
--- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.test.ts
+++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.test.ts
@ -8,13 +8,21 @@ describe("removeDefaultProperty", () => {
  });

  it("should remove the default property from a nested object", () => {
-        const input = { default: "test", nested: { default: "nestedTest", test: "nestedTest" } };
+    const input = {
+      default: "test",
+      nested: { default: "nestedTest", test: "nestedTest" },
+    };
    const expectedOutput = { nested: { test: "nestedTest" } };
    expect(removeDefaultProperty(input)).toEqual(expectedOutput);
  });

  it("should remove the default property from an array of objects", () => {
-        const input = { array: [{ default: "test1", test: "test1" }, { default: "test2", test: "test2" }] };
+    const input = {
+      array: [
+        { default: "test1", test: "test1" },
+        { default: "test2", test: "test2" },
+      ],
+    };
    const expectedOutput = { array: [{ test: "test1" }, { test: "test2" }] };
    expect(removeDefaultProperty(input)).toEqual(expectedOutput);
  });
--- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
+++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
@ -123,7 +123,7 @@ export async function generateOpenAICompletions(
  let schema = options.schema;
  if (schema) {
    schema = removeDefaultProperty(schema);
-}
+  }

  if (schema && schema.type === "array") {
    schema = {
@ -140,10 +140,10 @@ export async function generateOpenAICompletions(
      properties: Object.fromEntries(
        Object.entries(schema).map(([key, value]) => {
          return [key, removeDefaultProperty(value)];
-        })
+        }),
      ),
      required: Object.keys(schema),
-      additionalProperties: false
+      additionalProperties: false,
    };
  }

@ -240,14 +240,14 @@ export async function performLLMExtract(
 }

 export function removeDefaultProperty(schema: any): any {
-  if (typeof schema !== 'object' || schema === null) return schema;
+  if (typeof schema !== "object" || schema === null) return schema;

  const { default: _, ...rest } = schema;

  for (const key in rest) {
    if (Array.isArray(rest[key])) {
      rest[key] = rest[key].map((item: any) => removeDefaultProperty(item));
-      } else if (typeof rest[key] === 'object' && rest[key] !== null) {
+    } else if (typeof rest[key] === "object" && rest[key] !== null) {
      rest[key] = removeDefaultProperty(rest[key]);
    }
  }
--- a/apps/api/src/services/logging/log_job.ts
+++ b/apps/api/src/services/logging/log_job.ts
@ -9,9 +9,11 @@ configDotenv();

 function cleanOfNull<T>(x: T): T {
  if (Array.isArray(x)) {
-    return x.map(x => cleanOfNull(x)) as T;
+    return x.map((x) => cleanOfNull(x)) as T;
  } else if (typeof x === "object" && x !== null) {
-    return Object.fromEntries(Object.entries(x).map(([k,v]) => [k,cleanOfNull(v)])) as T
+    return Object.fromEntries(
+      Object.entries(x).map(([k, v]) => [k, cleanOfNull(v)]),
+    ) as T;
  } else if (typeof x === "string") {
    return x.replaceAll("\u0000", "") as T;
  } else {
--- a/apps/api/src/services/queue-service.ts
+++ b/apps/api/src/services/queue-service.ts
@ -16,9 +16,7 @@ export const loggingQueueName = "{loggingQueue}";

 export function getScrapeQueue() {
  if (!scrapeQueue) {
-    scrapeQueue = new Queue(
-      scrapeQueueName,
-      {
+    scrapeQueue = new Queue(scrapeQueueName, {
      connection: redisConnection,
      defaultJobOptions: {
        removeOnComplete: {
@ -28,8 +26,7 @@ export function getScrapeQueue() {
          age: 90000, // 25 hours
        },
      },
-      }
-    );
+    });
    logger.info("Web scraper queue created");
  }
  return scrapeQueue;
@ -37,9 +34,7 @@ export function getScrapeQueue() {

 export function getExtractQueue() {
  if (!extractQueue) {
-    extractQueue = new Queue(
-      extractQueueName,
-      {
+    extractQueue = new Queue(extractQueueName, {
      connection: redisConnection,
      defaultJobOptions: {
        removeOnComplete: {
@ -49,14 +44,12 @@ export function getExtractQueue() {
          age: 90000, // 25 hours
        },
      },
-      }
-    );
+    });
    logger.info("Extraction queue created");
  }
  return extractQueue;
 }

-
 // === REMOVED IN FAVOR OF POLLING -- NOT RELIABLE
 // import { QueueEvents } from 'bullmq';
 // export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() });
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -89,13 +89,19 @@ const runningJobs: Set<string> = new Set();
 async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
  if (await finishCrawl(job.data.crawl_id)) {
    (async () => {
-      const originUrl = sc.originUrl ? normalizeUrlOnlyHostname(sc.originUrl) : undefined;
+      const originUrl = sc.originUrl
+        ? normalizeUrlOnlyHostname(sc.originUrl)
+        : undefined;
      // Get all visited URLs from Redis
      const visitedUrls = await redisConnection.smembers(
        "crawl:" + job.data.crawl_id + ":visited",
      );
      // Upload to Supabase if we have URLs and this is a crawl (not a batch scrape)
-      if (visitedUrls.length > 0 && job.data.crawlerOptions !== null && originUrl) {
+      if (
+        visitedUrls.length > 0 &&
+        job.data.crawlerOptions !== null &&
+        originUrl
+      ) {
        // Fire and forget the upload to Supabase
        try {
          // Standardize URLs to canonical form (https, no www)
@ -317,7 +323,10 @@ const processJobInternal = async (token: string, job: Job & { id: string }) => {
  return err;
 };

-const processExtractJobInternal = async (token: string, job: Job & { id: string }) => {
+const processExtractJobInternal = async (
+  token: string,
+  job: Job & { id: string },
+) => {
  const logger = _logger.child({
    module: "extract-worker",
    method: "processJobInternal",
@ -360,11 +369,14 @@ const processExtractJobInternal = async (token: string, job: Job & { id: string

    await updateExtract(job.data.extractId, {
      status: "failed",
-      error: error.error ?? error ?? "Unknown error, please contact help@firecrawl.dev. Extract id: " + job.data.extractId,
+      error:
+        error.error ??
+        error ??
+        "Unknown error, please contact help@firecrawl.dev. Extract id: " +
+          job.data.extractId,
    });
    // throw error;
  } finally {
-    
    clearInterval(extendLockInterval);
  }
 };
@ -635,7 +647,9 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
            sc,
            jobs.map((x) => ({ id: x.opts.jobId, url: x.data.url })),
          );
-          const lockedJobs = jobs.filter(x => lockedIds.find(y => y.id === x.opts.jobId));
+          const lockedJobs = jobs.filter((x) =>
+            lockedIds.find((y) => y.id === x.opts.jobId),
+          );
          logger.debug("Adding scrape jobs to Redis...");
          await addCrawlJobs(
            job.data.crawl_id,
@ -790,7 +804,8 @@ async function processJob(job: Job & { id: string }, token: string) {
      ) {
        const crawler = crawlToCrawler(job.data.crawl_id, sc);
        if (
-          crawler.filterURL(doc.metadata.url, doc.metadata.sourceURL) === null &&
+          crawler.filterURL(doc.metadata.url, doc.metadata.sourceURL) ===
+            null &&
          !job.data.isCrawlSourceScrape
        ) {
          throw new Error(
@ -1073,7 +1088,7 @@ async function processJob(job: Job & { id: string }, token: string) {
  console.log("All workers exited. Waiting for all jobs to finish...");

  while (runningJobs.size > 0) {
-    await new Promise(resolve => setTimeout(resolve, 500));
+    await new Promise((resolve) => setTimeout(resolve, 500));
  }

  process.exit(0);