Merge branch 'main' into mog/index

2025-08-16 07:45:56 +08:00 · 2025-06-03 16:09:57 +02:00 · 2025-06-03 16:09:57 +02:00 · d7fef33224
commit d7fef33224
parent da9a9b0d19 e108ff3525
31 changed files with 458 additions and 429 deletions
--- a/.github/workflows/test-server-self-host.yml
+++ b/.github/workflows/test-server-self-host.yml
@ -128,7 +128,7 @@ jobs:
      - name: Kill SearXNG
        if: always() && matrix.search == 'searxng'
        run: |
-          docker logs searxng > searxng/searxng.log 2>&1
+          docker logs searxng > searxng.log 2>&1
          docker kill searxng
        working-directory: ./
      - uses: actions/upload-artifact@v4
@ -149,5 +149,4 @@ jobs:
        with:
          name: SearXNG (${{ matrix.ai }}, ${{ matrix.engine }}, ${{ matrix.proxy }})
          path: |
-            ./searxng/searxng.log
-            ./searxng/settings.yml
+            ./searxng.log
--- a/apps/api/package.json
+++ b/apps/api/package.json
@ -69,7 +69,6 @@
    "@google-cloud/storage": "^7.16.0",
    "@nangohq/node": "^0.40.8",
    "@openrouter/ai-sdk-provider": "^0.4.5",
-    "@pinecone-database/pinecone": "^4.0.0",
    "@sentry/cli": "^2.33.1",
    "@sentry/node": "^8.26.0",
    "@sentry/profiling-node": "^8.26.0",
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@ -59,9 +59,6 @@ importers:
      '@openrouter/ai-sdk-provider':
        specifier: ^0.4.5
        version: 0.4.5(zod@3.24.2)
-      '@pinecone-database/pinecone':
-        specifier: ^4.0.0
-        version: 4.0.0
      '@sentry/cli':
        specifier: ^2.33.1
        version: 2.33.1(encoding@0.1.13)
@ -1232,10 +1229,6 @@ packages:
  '@pdf-lib/upng@1.0.1':
    resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}

-  '@pinecone-database/pinecone@4.0.0':
-    resolution: {integrity: sha512-INYS+GBys9v5BRTyn0tv8srVsPTlSRvE3BPE4Wkc/lOEyAIyB9F7DEMXbeF19FOLEgRwCuHTLjzm1niENl+4FA==}
-    engines: {node: '>=18.0.0'}
-
  '@pkgjs/parseargs@0.11.0':
    resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
    engines: {node: '>=14'}
@ -6331,10 +6324,6 @@ snapshots:
    dependencies:
      pako: 1.0.11

-  '@pinecone-database/pinecone@4.0.0':
-    dependencies:
-      encoding: 0.1.13
-
  '@pkgjs/parseargs@0.11.0':
    optional: true

@ -7921,6 +7910,7 @@ snapshots:
  encoding@0.1.13:
    dependencies:
      iconv-lite: 0.6.3
+    optional: true

  end-of-stream@1.4.4:
    dependencies:
--- a/apps/api/src/tests/snips/scrape.test.ts
+++ b/apps/api/src/tests/snips/scrape.test.ts
@ -78,24 +78,24 @@ describe("Scrape tests", () => {
      expect(JSON.stringify(status)).toBe(JSON.stringify(response));
    }, 60000);
    
-    // describe("Ad blocking (f-e dependant)", () => {
-    //   it.concurrent("blocks ads by default", async () => {
-    //     const response = await scrape({
-    //       url: "https://www.allrecipes.com/recipe/18185/yum/",
-    //     });
+    describe("Ad blocking (f-e dependant)", () => {
+      it.concurrent("blocks ads by default", async () => {
+        const response = await scrape({
+          url: "https://www.allrecipes.com/recipe/18185/yum/",
+        });

-    //     expect(response.markdown).not.toContain(".g.doubleclick.net/");
-    //   }, 30000);
+        expect(response.markdown).not.toContain(".g.doubleclick.net/");
+      }, 30000);

-    //   it.concurrent("doesn't block ads if explicitly disabled", async () => {
-    //     const response = await scrape({
-    //       url: "https://www.allrecipes.com/recipe/18185/yum/",
-    //       blockAds: false,
-    //     });
+      it.concurrent("doesn't block ads if explicitly disabled", async () => {
+        const response = await scrape({
+          url: "https://www.allrecipes.com/recipe/18185/yum/",
+          blockAds: false,
+        });

-    //     expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
-    //   }, 30000);
-    // });
+        expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
+      }, 30000);
+    });

    describe("Index", () => {
      it.concurrent("caches properly", async () => {
--- a/apps/api/src/controllers/v0/scrape.ts
+++ b/apps/api/src/controllers/v0/scrape.ts
@ -82,6 +82,7 @@ export async function scrapeHelper(
      internalOptions,
      origin: req.body.origin ?? defaultOrigin,
      is_scrape: true,
+      startTime: Date.now(),
    },
    {},
    jobId,
--- a/apps/api/src/controllers/v0/search.ts
+++ b/apps/api/src/controllers/v0/search.ts
@ -112,6 +112,7 @@ export async function searchHelper(
        team_id: team_id,
        scrapeOptions,
        internalOptions,
+        startTime: Date.now(),
      },
      opts: {
        jobId: uuid,
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -13,6 +13,7 @@ import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
 import { getJobPriority } from "../../lib/job-priority";
 import { getScrapeQueue } from "../../services/queue-service";
 import { supabaseGetJobById } from "../../lib/supabase-jobs";
+import { calculateCreditsToBeBilled } from "../../lib/scrape-billing";

 export async function scrapeController(
  req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
@ -30,7 +31,6 @@ export async function scrapeController(
  });

  req.body = scrapeRequestSchema.parse(req.body);
-  let earlyReturn = false;

  const origin = req.body.origin;
  const timeout = req.body.timeout;
@ -42,6 +42,8 @@ export async function scrapeController(
  });
  // 

+  const isDirectToBullMQ = process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
+  
  await addScrapeJob(
    {
      url: req.body.url,
@ -52,13 +54,16 @@ export async function scrapeController(
        teamId: req.auth.team_id,
        saveScrapeResultToGCS: process.env.GCS_FIRE_ENGINE_BUCKET_NAME ? true : false,
        unnormalizedSourceURL: preNormalizedBody.url,
+        useCache: req.body.__experimental_cache ? true : false,
+        bypassBilling: isDirectToBullMQ,
      },
      origin: req.body.origin,
-      is_scrape: true,
+      startTime,
    },
    {},
    jobId,
    jobPriority,
+    isDirectToBullMQ,
  );

  const totalWait =
@ -124,57 +129,13 @@ export async function scrapeController(

  await getScrapeQueue().remove(jobId);

-  const endTime = new Date().getTime();
-  const timeTakenInSeconds = (endTime - startTime) / 1000;
-  const numTokens =
-    doc && doc.extract
-      ? // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
-        0 // TODO: fix
-      : 0;
-
-  let creditsToBeBilled = 1; // Assuming 1 credit per document
-  if (earlyReturn) {
-    // Don't bill if we're early returning
-    return;
-  }
-  if ((req.body.extract && req.body.formats?.includes("extract")) || (req.body.formats?.includes("changeTracking") && req.body.changeTrackingOptions?.modes?.includes("json"))) {
-    creditsToBeBilled = 5;
-  }
-
-  if (req.body.agent?.model?.toLowerCase() === "fire-1" || req.body.extract?.agent?.model?.toLowerCase() === "fire-1" || req.body.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
-    if (process.env.USE_DB_AUTHENTICATION === "true") {
-      // @Nick this is a hack pushed at 2AM pls help - mogery
-      const job = await supabaseGetJobById(jobId);
-      if (!job?.cost_tracking) {
-        logger.warn("No cost tracking found for job", {
-          jobId,
-        });
-      }
-      creditsToBeBilled = Math.ceil((job?.cost_tracking?.totalCost ?? 1) * 1800);
-    } else {
-      creditsToBeBilled = 150;
-    }
-  }
-
-  if (doc?.metadata?.proxyUsed === "stealth") {
-    creditsToBeBilled += 4;
-  }
-
-  billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
-    (error) => {
-      logger.error(
-        `Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`,
-      );
-      // Optionally, you could notify an admin or add to a retry queue here
-    },
-  );
-
  if (!req.body.formats.includes("rawHtml")) {
    if (doc && doc.rawHtml) {
      delete doc.rawHtml;
    }
  }

+
  return res.status(200).json({
    success: true,
    data: doc,
--- a/apps/api/src/controllers/v1/search.ts
+++ b/apps/api/src/controllers/v1/search.ts
@ -40,24 +40,24 @@ export async function searchAndScrapeSearchResult(
  try {
    const searchResults = await search({
      query,
-      num_results: 5
-  });
+      num_results: 5,
+    });

-  const documents = await Promise.all(
-    searchResults.map(result => 
-      scrapeSearchResult(
-        {
-          url: result.url,
-          title: result.title,
-          description: result.description
-        },
-        options,
-        logger,
-        costTracking,
-        flags
-      )
-    )
-  );
+    const documents = await Promise.all(
+      searchResults.map((result) =>
+        scrapeSearchResult(
+          {
+            url: result.url,
+            title: result.title,
+            description: result.description,
+          },
+          options,
+          logger,
+          costTracking,
+          flags,
+        ),
+      ),
+    );

    return documents;
  } catch (error) {
@ -76,6 +76,8 @@ async function scrapeSearchResult(
  logger: Logger,
  costTracking: CostTracking,
  flags: TeamFlags,
+  directToBullMQ: boolean = false,
+  isSearchPreview: boolean = false,
 ): Promise<Document> {
  const jobId = uuidv4();
  const jobPriority = await getJobPriority({
@ -99,18 +101,19 @@ async function scrapeSearchResult(
        mode: "single_urls" as Mode,
        team_id: options.teamId,
        scrapeOptions: options.scrapeOptions,
-        internalOptions: { teamId: options.teamId, useCache: true },
+        internalOptions: { teamId: options.teamId, useCache: true, bypassBilling: true },
        origin: options.origin,
        is_scrape: true,
-        
+        startTime: Date.now(),
      },
      {},
      jobId,
      jobPriority,
+      directToBullMQ,
    );

    const doc: Document = await waitForJob(jobId, options.timeout);
-    
+
    logger.info("Scrape job completed", {
      scrapeId: jobId,
      url: searchResult.url,
@ -169,6 +172,8 @@ export async function searchController(
  };
  const startTime = new Date().getTime();
  const costTracking = new CostTracking();
+  const isSearchPreview =
+    process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;

  try {
    req.body = searchRequestSchema.parse(req.body);
@ -197,7 +202,9 @@ export async function searchController(
    });

    if (req.body.ignoreInvalidURLs) {
-      searchResults = searchResults.filter((result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null));
+      searchResults = searchResults.filter(
+        (result) => !isUrlBlocked(result.url, req.acuc?.flags ?? null),
+      );
    }

    logger.info("Searching completed", {
@ -224,12 +231,20 @@ export async function searchController(
    } else {
      logger.info("Scraping search results");
      const scrapePromises = searchResults.map((result) =>
-        scrapeSearchResult(result, {
-          teamId: req.auth.team_id,
-          origin: req.body.origin,
-          timeout: req.body.timeout,
-          scrapeOptions: req.body.scrapeOptions,
-        }, logger, costTracking, req.acuc?.flags ?? null),
+        scrapeSearchResult(
+          result,
+          {
+            teamId: req.auth.team_id,
+            origin: req.body.origin,
+            timeout: req.body.timeout,
+            scrapeOptions: req.body.scrapeOptions,
+          },
+          logger,
+          costTracking,
+          req.acuc?.flags ?? null,
+          (req.acuc?.price_credits ?? 0) <= 3000,
+          isSearchPreview,
+        ),
      );

      const docs = await Promise.all(scrapePromises);
@ -255,11 +270,23 @@ export async function searchController(
    }

    // Bill team once for all successful results
-    billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.length).catch((error) => {
-      logger.error(
-        `Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
-      );
-    });
+    if (!isSearchPreview) {
+      billTeam(
+        req.auth.team_id,
+        req.acuc?.sub_id,
+        responseData.data.reduce((a, x) => {
+          if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
+            return a + x.metadata.numPages;
+          } else {
+            return a + 1;
+          }
+        }, 0),
+      ).catch((error) => {
+        logger.error(
+          `Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
+        );
+      });
+    }

    const endTime = new Date().getTime();
    const timeTakenInSeconds = (endTime - startTime) / 1000;
@ -269,22 +296,25 @@ export async function searchController(
      time_taken: timeTakenInSeconds,
    });

-    logJob({
-      job_id: jobId,
-      success: true,
-      num_docs: responseData.data.length,
-      docs: responseData.data,
-      time_taken: timeTakenInSeconds,
-      team_id: req.auth.team_id,
-      mode: "search",
-      url: req.body.query,
-      scrapeOptions: req.body.scrapeOptions,
-      origin: req.body.origin,
-      cost_tracking: costTracking,
-    });
+    logJob(
+      {
+        job_id: jobId,
+        success: true,
+        num_docs: responseData.data.length,
+        docs: responseData.data,
+        time_taken: timeTakenInSeconds,
+        team_id: req.auth.team_id,
+        mode: "search",
+        url: req.body.query,
+        scrapeOptions: req.body.scrapeOptions,
+        origin: req.body.origin,
+        cost_tracking: costTracking,
+      },
+      false,
+      isSearchPreview,
+    );

    return res.status(200).json(responseData);
-
  } catch (error) {
    if (
      error instanceof Error &&
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@ -311,6 +311,8 @@ const baseScrapeOptions = z
    proxy: z.enum(["basic", "stealth", "auto"]).optional(),
    maxAge: z.number().int().gte(0).safe().default(0),
    storeInCache: z.boolean().default(true),
+    __experimental_cache: z.boolean().default(false).optional(),
+    __searchPreviewToken: z.string().optional(),
  })
  .strict(strictMessage);

@ -1172,6 +1174,7 @@ export const searchRequestSchema = z
    origin: z.string().optional().default("api"),
    timeout: z.number().int().positive().finite().safe().default(60000),
    ignoreInvalidURLs: z.boolean().optional().default(false),
+    __searchPreviewToken: z.string().optional(),
    scrapeOptions: baseScrapeOptions
      .extend({
        formats: z
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -95,8 +95,13 @@ function startServer(port = DEFAULT_PORT) {
    logger.info(`Worker ${process.pid} listening on port ${port}`);
  });

-  const exitHandler = () => {
+  const exitHandler = async () => {
    logger.info("SIGTERM signal received: closing HTTP server");
+    if (process.env.IS_KUBERNETES === "true") {
+      // Account for GCE load balancer drain timeout
+      logger.info("Waiting 60s for GCE load balancer drain timeout");
+      await new Promise((resolve) => setTimeout(resolve, 60000));
+    }
    server.close(() => {
      logger.info("Server closed.");
      process.exit(0);
--- a/apps/api/src/lib/deep-research/deep-research-service.ts
+++ b/apps/api/src/lib/deep-research/deep-research-service.ts
@ -133,6 +133,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
            blockAds: false,
            maxAge: 0,
            dontStoreInCache: false,
+            __experimental_cache: true,
          },
        }, logger, costTracking, acuc?.flags ?? null);
        return response.length > 0 ? response : [];
--- a/apps/api/src/lib/extract/document-scraper.ts
+++ b/apps/api/src/lib/extract/document-scraper.ts
@ -49,6 +49,7 @@ export async function scrapeDocument(
        origin: options.origin,
        is_scrape: true,
        from_extract: true,
+        startTime: Date.now(),
      },
      {},
      jobId,
--- a/apps/api/src/lib/extract/fire-0/document-scraper-f0.ts
+++ b/apps/api/src/lib/extract/fire-0/document-scraper-f0.ts
@ -47,6 +47,7 @@ export async function scrapeDocument_F0(
        origin: options.origin,
        is_scrape: true,
        from_extract: true,
+        startTime: Date.now(),
      },
      {},
      jobId,
--- a/apps/api/src/lib/extract/index/pinecone.ts
+++ b/apps/api/src/lib/extract/index/pinecone.ts
@ -1,164 +0,0 @@
-import { Pinecone } from "@pinecone-database/pinecone";
-import { Document } from "../../../controllers/v1/types";
-import { logger } from "../../logger";
-import { embed } from "ai";
-import { getEmbeddingModel } from "../../generic-ai";
-
-const pinecone = new Pinecone({
-  apiKey: process.env.PINECONE_API_KEY!,
-});
-
-const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? "";
-
-const MAX_METADATA_SIZE = 30 * 1024; // 30KB in bytes
-
-export interface PageMetadata {
-  url: string;
-  originUrl: string;
-  title?: string;
-  description?: string;
-  crawlId?: string;
-  teamId?: string;
-  timestamp: number;
-  markdown?: string;
-}
-
-async function getEmbedding(text: string) {
-  const { embedding } = await embed({
-    model: getEmbeddingModel("text-embedding-3-small"),
-    value: text,
-  });
-
-  return embedding;
-}
-
-function normalizeUrl(url: string) {
-  const urlO = new URL(url);
-  if (!urlO.hostname.startsWith("www.")) {
-    urlO.hostname = "www." + urlO.hostname;
-  }
-  return urlO.href;
-}
-
-export async function indexPage({
-  document,
-  originUrl,
-  crawlId,
-  teamId,
-}: {
-  document: Document;
-  originUrl: string;
-  crawlId?: string;
-  teamId?: string;
-}) {
-  try {
-    const index = pinecone.index(INDEX_NAME);
-
-    // Trim markdown if it's too long
-    let trimmedMarkdown = document.markdown;
-    if (
-      trimmedMarkdown &&
-      Buffer.byteLength(trimmedMarkdown, "utf-8") > MAX_METADATA_SIZE
-    ) {
-      trimmedMarkdown = trimmedMarkdown.slice(
-        0,
-        Math.floor(MAX_METADATA_SIZE / 2),
-      ); // Using half the size to be safe with UTF-8 encoding
-    }
-
-    // Create text to embed
-    const textToEmbed = [
-      document.metadata.title,
-      document.metadata.description,
-      trimmedMarkdown,
-    ]
-      .filter(Boolean)
-      .join("\n\n");
-
-    // Get embedding from OpenAI
-    const embedding = await getEmbedding(textToEmbed);
-
-    const normalizedUrl = normalizeUrl(
-      document.metadata.sourceURL || document.metadata.url!,
-    );
-
-    // Prepare metadata
-    const metadata: PageMetadata = {
-      url: normalizedUrl,
-      originUrl: normalizeUrl(originUrl),
-      title: document.metadata.title ?? document.metadata.ogTitle ?? "",
-      description:
-        document.metadata.description ?? document.metadata.ogDescription ?? "",
-      crawlId,
-      teamId,
-      markdown: trimmedMarkdown,
-      timestamp: Date.now(),
-    };
-
-    // Upsert to Pinecone
-    await index.upsert([
-      {
-        id: normalizedUrl,
-        values: embedding,
-        metadata: {
-          ...metadata,
-          [document.metadata.sourceURL || document.metadata.url!]: true,
-        },
-      },
-    ]);
-
-    logger.debug("Successfully indexed page in Pinecone", {
-      url: metadata.url,
-      crawlId,
-    });
-  } catch (error) {
-    logger.error("Failed to index page in Pinecone", {
-      error,
-      url: document.metadata.sourceURL || document.metadata.url,
-      crawlId,
-    });
-  }
-}
-
-export async function searchSimilarPages(
-  query: string,
-  originUrl?: string,
-  limit: number = 1000,
-): Promise<any[]> {
-  try {
-    const index = pinecone.index(INDEX_NAME);
-
-    // Get query embedding from OpenAI
-    const queryEmbedding = await getEmbedding(query);
-
-    const queryParams: any = {
-      vector: queryEmbedding,
-      topK: limit,
-      includeMetadata: true,
-    };
-
-    const normalizedOriginUrl = originUrl ? normalizeUrl(originUrl) : undefined;
-    // Add filter if originUrl is provided
-    if (normalizedOriginUrl) {
-      queryParams.filter = {
-        originUrl: { $eq: normalizedOriginUrl },
-      };
-    }
-
-    const results = await index.query(queryParams);
-    return results.matches.map((match) => ({
-      url: match.metadata?.url,
-      title: match.metadata?.title,
-      description: match.metadata?.description,
-      score: match.score,
-      markdown: match.metadata?.markdown,
-    }));
-  } catch (error) {
-    logger.error("Failed to search similar pages in Pinecone", {
-      error,
-      query,
-      originUrl,
-    });
-    return [];
-  }
-}
--- a/apps/api/src/lib/extract/reranker.ts
+++ b/apps/api/src/lib/extract/reranker.ts
@ -4,7 +4,6 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { logger } from "../logger";
 import { CohereClient } from "cohere-ai";
 import { extractConfig } from "./config";
-import { searchSimilarPages } from "./index/pinecone";
 import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
 import { buildRerankerUserPrompt } from "./build-prompts";
 import { buildRerankerSystemPrompt } from "./build-prompts";
--- a/apps/api/src/lib/scrape-billing.ts
+++ b/apps/api/src/lib/scrape-billing.ts
@ -0,0 +1,49 @@
+import { Document, ScrapeOptions } from "../controllers/v1/types";
+import { supabaseGetJobById } from "./supabase-jobs";
+import { logger } from "./logger";
+import { CostTracking } from "./extract/extraction-service";
+
+const creditsPerPDFPage = 1;
+const stealthProxyCostBonus = 4;
+
+export async function calculateCreditsToBeBilled(options: ScrapeOptions, document: Document, jobId: string, costTracking?: any) {
+    let creditsToBeBilled = 1; // Assuming 1 credit per document
+    if ((options.extract && options.formats?.includes("extract")) || (options.formats?.includes("changeTracking") && options.changeTrackingOptions?.modes?.includes("json"))) {
+        creditsToBeBilled = 5;
+    }
+
+    if (options.agent?.model?.toLowerCase() === "fire-1" || options.extract?.agent?.model?.toLowerCase() === "fire-1" || options.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
+        if (process.env.USE_DB_AUTHENTICATION === "true") {
+            // @Nick this is a hack pushed at 2AM pls help - mogery
+            if (!costTracking) {
+                const job = await supabaseGetJobById(jobId);
+                costTracking = job?.cost_tracking;
+            }
+
+            if (!costTracking) {
+                logger.warn("No cost tracking found for job", {
+                    jobId,
+                    scrapeId: jobId
+                });
+            }
+            
+            if (costTracking instanceof CostTracking) {
+                costTracking = costTracking.toJSON();
+            }
+
+            creditsToBeBilled = Math.ceil((costTracking?.totalCost ?? 1) * 1800);
+        } else {
+            creditsToBeBilled = 150;
+        }
+    } 
+    
+    if (document.metadata.numPages !== undefined && document.metadata.numPages > 1) {
+        creditsToBeBilled += creditsPerPDFPage * (document.metadata.numPages - 1);
+    }
+
+    if (document?.metadata?.proxyUsed === "stealth") {
+        creditsToBeBilled += stealthProxyCostBonus;
+    }
+
+    return creditsToBeBilled;
+}
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@ -15,7 +15,6 @@ import {
  ScrapeUrlResponse,
 } from "../scraper/scrapeURL";
 import { Engine } from "../scraper/scrapeURL/engines";
-import { indexPage } from "../lib/extract/index/pinecone";
 import { CostTracking } from "../lib/extract/extraction-service";
 configDotenv();

--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
@ -224,7 +224,6 @@ export async function scrapeURLWithFireEngineChromeCDP(
    mobile: meta.options.mobile,
    timeout, // TODO: better timeout logic
    disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
-    blockAds: meta.options.blockAds,
    mobileProxy: meta.featureFlags.has("stealthProxy"),
    saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS,
    // TODO: scrollXPaths
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
@ -12,7 +12,6 @@ export type FireEngineScrapeRequestCommon = {
  headers?: { [K: string]: string };

  blockMedia?: boolean; // default: true
-  blockAds?: boolean; // default: true
  // pageOptions?: any; // unused, .scrollXPaths is considered on FE side

  // useProxy?: boolean; // unused, default: true
@ -39,7 +38,6 @@ export type FireEngineScrapeRequestChromeCDP = {
  blockMedia?: true; // cannot be false
  mobile?: boolean;
  disableSmartWaitCache?: boolean;
-  blockAds?: boolean; // default: true
  saveScrapeResultToGCS?: boolean;
 };

@ -58,7 +56,6 @@ export type FireEngineScrapeRequestTLSClient = {
  engine: "tlsclient";
  atsv?: boolean; // v0 only, default: false
  disableJsDom?: boolean; // v0 only, default: false
-  // blockAds?: boolean; // default: true
 };

 const schema = z.object({
--- a/apps/api/src/scraper/scrapeURL/engines/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/index.ts
@ -74,6 +74,7 @@ export const featureFlags = [
  "skipTlsVerification",
  "useFastMode",
  "stealthProxy",
+  "disableAdblock",
 ] as const;

 export type FeatureFlag = (typeof featureFlags)[number];
@ -95,6 +96,7 @@ export const featureFlagOptions: {
  mobile: { priority: 10 },
  skipTlsVerification: { priority: 10 },
  stealthProxy: { priority: 20 },
+  disableAdblock: { priority: 10 },
 } as const;

 export type EngineScrapeResult = {
@ -171,6 +173,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: false,
      stealthProxy: false,
+      disableAdblock: false,
    },
    quality: 1000, // cache should always be tried first
  },
@ -205,6 +208,7 @@ export const engineOptions: {
      skipTlsVerification: true,
      useFastMode: false,
      stealthProxy: false,
+      disableAdblock: false,
    },
    quality: 50,
  },
@ -222,6 +226,7 @@ export const engineOptions: {
      skipTlsVerification: true,
      useFastMode: false,
      stealthProxy: false,
+      disableAdblock: false,
    },
    quality: 45,
  },
@ -256,6 +261,7 @@ export const engineOptions: {
      skipTlsVerification: true,
      useFastMode: false,
      stealthProxy: true,
+      disableAdblock: false,
    },
    quality: -2,
  },
@ -273,6 +279,7 @@ export const engineOptions: {
      skipTlsVerification: true,
      useFastMode: false,
      stealthProxy: true,
+      disableAdblock: false,
    },
    quality: -5,
  },
@ -290,6 +297,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: false,
      stealthProxy: false,
+      disableAdblock: true,
    },
    quality: 40,
  },
@ -307,6 +315,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: false,
      stealthProxy: true,
+      disableAdblock: true,
    },
    quality: -10,
  },
@ -324,6 +333,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: false,
      stealthProxy: false,
+      disableAdblock: false,
    },
    quality: 20,
  },
@ -341,6 +351,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: true,
      stealthProxy: false,
+      disableAdblock: false,
    },
    quality: 10,
  },
@ -358,6 +369,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: true,
      stealthProxy: true,
+      disableAdblock: false,
    },
    quality: -15,
  },
@ -375,6 +387,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: true,
      stealthProxy: false,
+      disableAdblock: false,
    },
    quality: 5,
  },
@ -392,6 +405,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: true,
      stealthProxy: true, // kinda...
+      disableAdblock: true,
    },
    quality: -20,
  },
@ -409,6 +423,7 @@ export const engineOptions: {
      skipTlsVerification: false,
      useFastMode: true,
      stealthProxy: true, // kinda...
+      disableAdblock: true,
    },
    quality: -20,
  },
--- a/apps/api/src/scraper/scrapeURL/index.ts
+++ b/apps/api/src/scraper/scrapeURL/index.ts
@ -120,6 +120,10 @@ function buildFeatureFlags(
    flags.add("docx");
  }

+  if (options.blockAds === false) {
+    flags.add("disableAdblock");
+  }
+
  return flags;
 }

@ -187,6 +191,7 @@ export type InternalOptions = {
  unnormalizedSourceURL?: string;

  saveScrapeResultToGCS?: boolean; // Passed along to fire-engine
+  bypassBilling?: boolean;
 };

 export type EngineResultsTracker = {
--- a/apps/api/src/search/fireEngine.ts
+++ b/apps/api/src/search/fireEngine.ts
@ -5,6 +5,58 @@ import { logger } from "../lib/logger";

 dotenv.config();

+
+export async function fire_engine_search(
+  q: string,
+  options: {
+    tbs?: string;
+    filter?: string;
+    lang?: string;
+    country?: string;
+    location?: string;
+    numResults: number;
+    page?: number;
+  },
+  abort?: AbortSignal,
+): Promise<SearchResult[]> {
+  try {
+    let data = JSON.stringify({
+      query: q,
+      lang: options.lang,
+      country: options.country,
+      location: options.location,
+      tbs: options.tbs,
+      numResults: options.numResults,
+      page: options.page ?? 1,
+    });
+
+    if (!process.env.FIRE_ENGINE_BETA_URL) {
+      return [];
+    }
+
+    const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-Disable-Cache": "true",
+      },
+      body: data,
+      signal: abort,
+    });
+
+    if (response.ok) {
+      const responseData = await response.json();
+      return responseData;
+    } else {
+      return [];
+    }
+  } catch (error) {
+    logger.error(error);
+    Sentry.captureException(error);
+    return [];
+  }
+}
+
 export async function fireEngineMap(
  q: string,
  options: {
@ -34,7 +86,7 @@ export async function fireEngineMap(
      return [];
    }

-    const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, {
+    const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/map`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
--- a/apps/api/src/search/index.ts
+++ b/apps/api/src/search/index.ts
@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
 import { searchapi_search } from "./searchapi";
 import { serper_search } from "./serper";
 import { searxng_search } from "./searxng";
+import { fire_engine_search } from "./fireEngine";

 export async function search({
  query,
@ -31,8 +32,19 @@ export async function search({
  timeout?: number;
 }): Promise<SearchResult[]> {
  try {
+    if (process.env.FIRE_ENGINE_BETA_URL) {
+      const results = await fire_engine_search(query, {
+        numResults: num_results,
+        tbs,
+        filter,
+        lang,
+        country,
+        location,
+      });
+      if (results.length > 0) return results;
+    }
    if (process.env.SERPER_API_KEY) {
-      return await serper_search(query, {
+      const results = await serper_search(query, {
        num_results,
        tbs,
        filter,
@ -40,9 +52,10 @@ export async function search({
        country,
        location,
      });
+      if (results.length > 0) return results;
    }
    if (process.env.SEARCHAPI_API_KEY) {
-      return await searchapi_search(query, {
+      const results = await searchapi_search(query, {
        num_results,
        tbs,
        filter,
@ -50,9 +63,10 @@ export async function search({
        country,
        location,
      });
+      if (results.length > 0) return results;
    }
    if (process.env.SEARXNG_ENDPOINT) {
-      return await searxng_search(query, {
+      const results = await searxng_search(query, {
        num_results,
        tbs,
        filter,
@ -60,6 +74,7 @@ export async function search({
        country,
        location,
      });
+      if (results.length > 0) return results;
    }
    return await googleSearch(
      query,
--- a/apps/api/src/services/logging/log_job.ts
+++ b/apps/api/src/services/logging/log_job.ts
@ -21,12 +21,13 @@ function cleanOfNull<T>(x: T): T {
  }
 }

-export async function logJob(job: FirecrawlJob, force: boolean = false) {
+export async function logJob(job: FirecrawlJob, force: boolean = false, bypassLogging: boolean = false) {
  try {
    const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
    if (!useDbAuthentication) {
      return;
    }
+    

    // Redact any pages that have an authorization header
    // actually, Don't. we use the db to retrieve results now. this breaks authed crawls - mogery
@ -63,12 +64,17 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
      is_migrated: true,
      cost_tracking: job.cost_tracking,
      pdf_num_pages: job.pdf_num_pages ?? null,
+      credits_billed: job.credits_billed ?? null,
    };

    if (process.env.GCS_BUCKET_NAME) {
      await saveJobToGCS(job);
    }

+    if (bypassLogging) {
+      return;
+    }
+
    if (force) {
      let i = 0,
        done = false;
--- a/apps/api/src/services/queue-jobs.ts
+++ b/apps/api/src/services/queue-jobs.ts
@ -97,6 +97,7 @@ async function addScrapeJobRaw(
  options: any,
  jobId: string,
  jobPriority: number,
+  directToBullMQ: boolean = false,
 ) {
  const hasCrawlDelay = webScraperOptions.crawl_id && webScraperOptions.crawlerOptions?.delay;

@ -127,7 +128,7 @@ async function addScrapeJobRaw(

  const concurrencyQueueJobs = await getConcurrencyQueueJobsCount(webScraperOptions.team_id);

-  if (concurrencyLimited) {
+  if (concurrencyLimited && !directToBullMQ) {
    // Detect if they hit their concurrent limit
    // If above by 2x, send them an email
    // No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
@ -161,6 +162,7 @@ export async function addScrapeJob(
  options: any = {},
  jobId: string = uuidv4(),
  jobPriority: number = 10,
+  directToBullMQ: boolean = false,
 ) {
  if (Sentry.isInitialized()) {
    const size = JSON.stringify(webScraperOptions).length;
@ -187,11 +189,12 @@ export async function addScrapeJob(
          options,
          jobId,
          jobPriority,
+          directToBullMQ,
        );
      },
    );
  } else {
-    await addScrapeJobRaw(webScraperOptions, options, jobId, jobPriority);
+    await addScrapeJobRaw(webScraperOptions, options, jobId, jobPriority, directToBullMQ);
  }
 }

--- a/apps/api/src/services/queue-service.ts
+++ b/apps/api/src/services/queue-service.ts
@ -66,10 +66,10 @@ export function getIndexQueue() {
      connection: redisConnection,
      defaultJobOptions: {
        removeOnComplete: {
-          age: 90000, // 25 hours
+          age: 3600, // 1 hour
        },
        removeOnFail: {
-          age: 90000, // 25 hours
+          age: 3600, // 1 hour
        },
      },
    });
@ -120,10 +120,10 @@ export function getBillingQueue() {
      connection: redisConnection,
      defaultJobOptions: {
        removeOnComplete: {
-          age: 90000, // 25 hours
+          age: 3600, // 1 hour
        },
        removeOnFail: {
-          age: 90000, // 25 hours
+          age: 3600, // 1 hour
        },
      },
    });
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -61,7 +61,6 @@ import {
 } from "../lib/concurrency-limit";
 import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
 import { BLOCKLISTED_URL_MESSAGE } from "../lib/strings";
-import { indexPage } from "../lib/extract/index/pinecone";
 import { Document } from "../controllers/v1/types";
 import {
  ExtractResult,
@ -85,9 +84,11 @@ import https from "https";
 import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
 import { robustFetch } from "../scraper/scrapeURL/lib/fetch";
 import { RateLimiterMode } from "../types";
+import { calculateCreditsToBeBilled } from "../lib/scrape-billing";
 import { redisEvictConnection } from "./redis";
 import { generateURLSplits, hashURL, index_supabase_service, useIndex } from "./index";
 import { WebCrawler } from "../scraper/WebScraper/crawler";
+import type { Logger } from "winston";

 configDotenv();

@ -320,7 +321,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
        scrapeOptions: sc.scrapeOptions,
        crawlerOptions: sc.crawlerOptions,
        origin: job.data.origin,
-      });
+      }, false, job.data.internalOptions?.bypassBilling ?? false);
      logger.info("Logged crawl!");

      const data = {
@ -372,8 +373,10 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
          origin: job.data.origin,
        },
        true,
+        job.data.internalOptions?.bypassBilling ?? false,
      );

+
      // v1 web hooks, call when done with no data, but with event completed
      if (job.data.v1 && job.data.webhook) {
        callWebhook(
@ -1133,6 +1136,59 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
  }
 }

+async function billScrapeJob(job: Job & { id: string }, document: Document, logger: Logger, costTracking?: CostTracking) {
+  let creditsToBeBilled: number | null = null;
+
+  if (job.data.is_scrape !== true && !job.data.internalOptions?.bypassBilling) {
+    creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, document, job.id, costTracking);
+
+    if (
+      job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&
+      process.env.USE_DB_AUTHENTICATION === "true"
+    ) {
+      try {
+        const billingJobId = uuidv4();
+        logger.debug(
+          `Adding billing job to queue for team ${job.data.team_id}`,
+          {
+            billingJobId,
+            credits: creditsToBeBilled,
+            is_extract: false,
+          },
+        );
+
+        // Add directly to the billing queue - the billing worker will handle the rest
+        await getBillingQueue().add(
+          "bill_team",
+          {
+            team_id: job.data.team_id,
+            subscription_id: undefined,
+            credits: creditsToBeBilled,
+            is_extract: false,
+            timestamp: new Date().toISOString(),
+            originating_job_id: job.id,
+          },
+          {
+            jobId: billingJobId,
+            priority: 10,
+          },
+        );
+
+        return creditsToBeBilled;
+      } catch (error) {
+        logger.error(
+          `Failed to add billing job to queue for team ${job.data.team_id} for ${creditsToBeBilled} credits`,
+          { error },
+        );
+        Sentry.captureException(error);
+        return creditsToBeBilled;
+      }
+    }
+  }
+
+  return creditsToBeBilled;
+}
+
 async function processJob(job: Job & { id: string }, token: string) {
  const logger = _logger.child({
    module: "queue-worker",
@ -1143,25 +1199,9 @@ async function processJob(job: Job & { id: string }, token: string) {
    teamId: job.data?.team_id ?? undefined,
  });
  logger.info(`🐂 Worker taking job ${job.id}`, { url: job.data.url });
-  const start = Date.now();
+  const start = job.data.startTime ?? Date.now();
+  const remainingTime = job.data.scrapeOptions.timeout ? (job.data.scrapeOptions.timeout - (Date.now() - start)) : undefined;

-  // Check if the job URL is researchhub and block it immediately
-  // TODO: remove this once solve the root issue
-  // if (
-  //   job.data.url &&
-  //   (job.data.url.includes("researchhub.com") ||
-  //     job.data.url.includes("ebay.com"))
-  // ) {
-  //   logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`);
-  //   const data = {
-  //     success: false,
-  //     document: null,
-  //     project_id: job.data.project_id,
-  //     error:
-  //       "URL is blocked. Suspecious activity detected. Please contact help@firecrawl.com if you believe this is an error.",
-  //   };
-  //   return data;
-  // }
  const costTracking = new CostTracking();

  try {
@ -1172,6 +1212,11 @@ async function processJob(job: Job & { id: string }, token: string) {
      current_url: "",
    });

+    if (remainingTime !== undefined && remainingTime < 0) {
+      throw new Error("timeout");
+    }
+    const signal = remainingTime ? AbortSignal.timeout(remainingTime) : undefined;
+
    if (job.data.crawl_id) {
      const sc = (await getCrawl(job.data.crawl_id)) as StoredCrawl;
      if (sc && sc.cancelled) {
@ -1185,16 +1230,22 @@ async function processJob(job: Job & { id: string }, token: string) {
        token,
        costTracking,
      }),
-      ...(job.data.scrapeOptions.timeout !== undefined
+      ...(remainingTime !== undefined
        ? [
            (async () => {
-              await sleep(job.data.scrapeOptions.timeout);
+              await sleep(remainingTime);
              throw new Error("timeout");
            })(),
          ]
        : []),
    ]);

+    try {
+      signal?.throwIfAborted();
+    } catch (e) {
+      throw new Error("timeout");
+    }
+
    if (!pipeline.success) {
      throw pipeline.error;
    }
@ -1295,45 +1346,6 @@ async function processJob(job: Job & { id: string }, token: string) {
        }
      }

-      logger.debug("Logging job to DB...");
-      await logJob(
-        {
-          job_id: job.id as string,
-          success: true,
-          num_docs: 1,
-          docs: [doc],
-          time_taken: timeTakenInSeconds,
-          team_id: job.data.team_id,
-          mode: job.data.mode,
-          url: job.data.url,
-          crawlerOptions: sc.crawlerOptions,
-          scrapeOptions: job.data.scrapeOptions,
-          origin: job.data.origin,
-          crawl_id: job.data.crawl_id,
-          cost_tracking: costTracking,
-          pdf_num_pages: doc.metadata.numPages,
-        },
-        true,
-      );
-
-      if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
-        logger.debug("Calling webhook with success...", {
-          webhook: job.data.webhook,
-        });
-        await callWebhook(
-          job.data.team_id,
-          job.data.crawl_id,
-          data,
-          job.data.webhook,
-          job.data.v1,
-          job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
-          true,
-        );
-      }
-
-      logger.debug("Declaring job as done...");
-      await addCrawlJobDone(job.data.crawl_id, job.id, true);
-
      if (job.data.crawlerOptions !== null) {
        if (!sc.cancelled) {
          const crawler = crawlToCrawler(
@ -1429,8 +1441,65 @@ async function processJob(job: Job & { id: string }, token: string) {
        }
      }

+      try {
+        signal?.throwIfAborted();
+      } catch (e) {
+        throw new Error("timeout");
+      }
+
+      const credits_billed = await billScrapeJob(job, doc, logger, costTracking);
+
+      logger.debug("Logging job to DB...");
+      await logJob(
+        {
+          job_id: job.id as string,
+          success: true,
+          num_docs: 1,
+          docs: [doc],
+          time_taken: timeTakenInSeconds,
+          team_id: job.data.team_id,
+          mode: job.data.mode,
+          url: job.data.url,
+          crawlerOptions: sc.crawlerOptions,
+          scrapeOptions: job.data.scrapeOptions,
+          origin: job.data.origin,
+          crawl_id: job.data.crawl_id,
+          cost_tracking: costTracking,
+          pdf_num_pages: doc.metadata.numPages,
+          credits_billed,
+        },
+        true,
+        job.data.internalOptions?.bypassBilling ?? false,
+      );
+
+      if (job.data.webhook && job.data.mode !== "crawl" && job.data.v1) {
+        logger.debug("Calling webhook with success...", {
+          webhook: job.data.webhook,
+        });
+        await callWebhook(
+          job.data.team_id,
+          job.data.crawl_id,
+          data,
+          job.data.webhook,
+          job.data.v1,
+          job.data.crawlerOptions !== null ? "crawl.page" : "batch_scrape.page",
+          true,
+        );
+      }
+
+      logger.debug("Declaring job as done...");
+      await addCrawlJobDone(job.data.crawl_id, job.id, true);
+
      await finishCrawlIfNeeded(job, sc);
    } else {
+      try {
+        signal?.throwIfAborted();
+      } catch (e) {
+        throw new Error("timeout");
+      }
+
+      const credits_billed = await billScrapeJob(job, doc, logger, costTracking);
+
      await logJob({
        job_id: job.id,
        success: true,
@ -1446,66 +1515,8 @@ async function processJob(job: Job & { id: string }, token: string) {
        num_tokens: 0, // TODO: fix
        cost_tracking: costTracking,
        pdf_num_pages: doc.metadata.numPages,
-      });
-    }
-
-    if (job.data.is_scrape !== true) {
-      let creditsToBeBilled = 1; // Assuming 1 credit per document
-      if ((job.data.scrapeOptions.extract && job.data.scrapeOptions.formats?.includes("extract")) || (job.data.scrapeOptions.formats?.includes("changeTracking") && job.data.scrapeOptions.changeTrackingOptions?.modes?.includes("json"))) {
-        creditsToBeBilled = 5;
-      }
-
-      if (job.data.scrapeOptions.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.extract?.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
-        if (process.env.USE_DB_AUTHENTICATION === "true") {
-          creditsToBeBilled = Math.ceil((costTracking.toJSON().totalCost ?? 1) * 1800);
-        } else {
-          creditsToBeBilled = 150;
-        }
-      }
-
-      if (doc.metadata?.proxyUsed === "stealth") {
-        creditsToBeBilled += 4;
-      }
-
-      if (
-        job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&
-        process.env.USE_DB_AUTHENTICATION === "true"
-      ) {
-        try {
-          const billingJobId = uuidv4();
-          logger.debug(
-            `Adding billing job to queue for team ${job.data.team_id}`,
-            {
-              billingJobId,
-              credits: creditsToBeBilled,
-              is_extract: false,
-            },
-          );
-
-          // Add directly to the billing queue - the billing worker will handle the rest
-          await getBillingQueue().add(
-            "bill_team",
-            {
-              team_id: job.data.team_id,
-              subscription_id: undefined,
-              credits: creditsToBeBilled,
-              is_extract: false,
-              timestamp: new Date().toISOString(),
-              originating_job_id: job.id,
-            },
-            {
-              jobId: billingJobId,
-              priority: 10,
-            },
-          );
-        } catch (error) {
-          logger.error(
-            `Failed to add billing job to queue for team ${job.data.team_id} for ${creditsToBeBilled} credits`,
-            { error },
-          );
-          Sentry.captureException(error);
-        }
-      }
+        credits_billed,
+      }, false, job.data.internalOptions?.bypassBilling ?? false);
    }

    logger.info(`🐂 Job done ${job.id}`);
@ -1604,6 +1615,7 @@ async function processJob(job: Job & { id: string }, token: string) {
        cost_tracking: costTracking,
      },
      true,
+      job.data.internalOptions?.bypassBilling ?? false,
    );
    return data;
  }
--- a/apps/api/src/types.ts
+++ b/apps/api/src/types.ts
@ -44,9 +44,15 @@ export interface WebScraperOptions {
  sitemapped?: boolean;
  webhook?: z.infer<typeof webhookSchema>;
  v1?: boolean;
+
+  /**
+   * Disables billing on the worker side.
+   */
  is_scrape?: boolean;
+
  isCrawlSourceScrape?: boolean;
  from_extract?: boolean;
+  startTime?: number;
 }

 export interface RunWebScraperParams {
@ -95,6 +101,7 @@ export interface FirecrawlJob {
  sources?: Record<string, string[]>;
  cost_tracking?: CostTracking;
  pdf_num_pages?: number;
+  credits_billed?: number | null;
 }

 export interface FirecrawlScrapeResponse {
--- a/apps/rust-sdk/Cargo.lock
+++ b/apps/rust-sdk/Cargo.lock
@ -680,7 +680,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"

 [[package]]
 name = "firecrawl"
-version = "1.1.0"
+version = "1.2.0"
 dependencies = [
 "assert_matches",
 "axum",
--- a/apps/rust-sdk/Cargo.toml
+++ b/apps/rust-sdk/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 name = "firecrawl"
 author= "Mendable.ai"
-version = "1.1.0"
+version = "1.2.0"
 edition = "2021"
 license = "MIT"
 homepage = "https://www.firecrawl.dev/"
--- a/apps/rust-sdk/src/crawl.rs
+++ b/apps/rust-sdk/src/crawl.rs
@ -99,6 +99,49 @@ impl From<CrawlScrapeOptions> for ScrapeOptions {
    }
 }

+/// Options for webhook notifications
+#[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
+#[serde(rename_all = "camelCase")]
+pub struct WebhookOptions {
+    /// URL to send webhook notifications to
+    pub url: String,
+
+    /// Custom headers to include in webhook requests
+    pub headers: Option<HashMap<String, String>>,
+
+    /// Custom data included in all webhook payloads
+    pub metadata: Option<HashMap<String, String>>,
+
+    /// Event types to receive
+    pub events: Option<Vec<WebhookEvent>>,
+}
+
+impl From<String> for WebhookOptions {
+    fn from(value: String) -> Self {
+        Self {
+            url: value,
+            ..Default::default()
+        }
+    }
+}
+
+#[derive(Deserialize, Serialize, Debug, PartialEq, Eq, Clone, Copy)]
+#[serde(rename_all = "camelCase")]
+pub enum WebhookEvent {
+    /// Crawl finished successfully
+    Completed,
+
+    /// Crawl encountered an error
+    Failed,
+
+    /// Individual page scraped
+    Page,
+
+    /// Crawl job initiated
+    Started,
+}
+
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
@ -132,7 +175,7 @@ pub struct CrawlOptions {
    pub allow_external_links: Option<bool>,

    /// URL to send Webhook crawl events to.
-    pub webhook: Option<String>,
+    pub webhook: Option<WebhookOptions>,

    /// Idempotency key to send to the crawl endpoint.
    #[serde(skip)]