From 40eacfacd4c5a782f722fd85e3935f4a1741b27a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 16:41:21 -0300 Subject: [PATCH 01/13] Update runWebScraper.ts --- apps/api/src/main/runWebScraper.ts | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index c6751218..d9ced895 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -146,25 +146,25 @@ export async function runWebScraper({ EngineResultsTracker[Engine], undefined >; - ScrapeEvents.insert(bull_job_id, { - type: "scrape", - url, - method: engine, - result: { - success: result.state === "success", - response_code: - result.state === "success" ? result.result.statusCode : undefined, - response_size: - result.state === "success" ? result.result.html.length : undefined, - error: - result.state === "error" - ? result.error - : result.state === "timeout" - ? "Timed out" - : undefined, - time_taken: result.finishedAt - result.startedAt, - }, - }); + // ScrapeEvents.insert(bull_job_id, { + // type: "scrape", + // url, + // method: engine, + // result: { + // success: result.state === "success", + // response_code: + // result.state === "success" ? result.result.statusCode : undefined, + // response_size: + // result.state === "success" ? result.result.html.length : undefined, + // error: + // result.state === "error" + // ? result.error + // : result.state === "timeout" + // ? "Timed out" + // : undefined, + // time_taken: result.finishedAt - result.startedAt, + // }, + // }); } if (error === undefined && response?.success) { @@ -219,7 +219,7 @@ const saveJob = async ( // // I think the job won't exist here anymore // } } - ScrapeEvents.logJobEvent(job, "completed"); + // ScrapeEvents.logJobEvent(job, "completed"); } catch (error) { _logger.error(`🐂 Failed to update job status`, { module: "runWebScraper", From 1de5a2c589ccf7fe0dfeea652f7e823b2caf4499 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 16:57:23 -0300 Subject: [PATCH 02/13] Update batch_billing.ts --- apps/api/src/services/billing/batch_billing.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/billing/batch_billing.ts b/apps/api/src/services/billing/batch_billing.ts index caa2c0e0..8acf3aef 100644 --- a/apps/api/src/services/billing/batch_billing.ts +++ b/apps/api/src/services/billing/batch_billing.ts @@ -10,8 +10,8 @@ import { getACUC, setCachedACUC } from "../../controllers/auth"; const BATCH_KEY = "billing_batch"; const BATCH_LOCK_KEY = "billing_batch_lock"; const BATCH_SIZE = 50; // Batch size for processing -const BATCH_TIMEOUT = 15000; // 15 seconds processing interval -const LOCK_TIMEOUT = 30000; // 30 seconds lock timeout +const BATCH_TIMEOUT = 30000; // 15 seconds processing interval +const LOCK_TIMEOUT = 60000; // 30 seconds lock timeout // Define interfaces for billing operations interface BillingOperation { From 72d894c2a33689e898602121c26f76f8d7293ffb Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 17:06:42 -0300 Subject: [PATCH 03/13] Update rate-limiter.ts --- apps/api/src/services/rate-limiter.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 4b4af827..480c18a4 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -137,8 +137,8 @@ const RATE_LIMITS = { extract_pro: 1000, }, preview: { - free: 5, - default: 5, + free: 0, + default: 0, }, account: { free: 100, From e6c3f209445158afcd1b048bacc1e924509da10a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 6 Mar 2025 21:08:09 +0100 Subject: [PATCH 04/13] fix(preview): temporarily disable --- apps/api/src/controllers/auth.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index a5aed8ff..f21eaf92 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -201,6 +201,8 @@ export async function supaAuthenticateUser( let chunk: AuthCreditUsageChunk | null = null; let plan: PlanType = "free"; if (token == "this_is_just_a_preview_token") { + throw new Error("Unauthenticated Playground calls are temporarily disabled due to abuse. Please sign up."); + if (mode == RateLimiterMode.CrawlStatus) { rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token); } else if (mode == RateLimiterMode.ExtractStatus) { From ae010a76c19e5246ccf97d716d66c663296725f4 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 17:11:00 -0300 Subject: [PATCH 05/13] Update blocklist.ts --- apps/api/src/scraper/WebScraper/utils/blocklist.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 02aab89f..21889795 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -67,6 +67,7 @@ const urlBlocklist = [ "vMdzZ33BXoyWVZnAPOBcrg==", "l8GDVI8w/ueHnNzdN1ODuQ==", "+yz9bnYYMnC0trJZGJwf6Q==", + "oTdhIjEjqdT2pEvyxD1Ssg==", ] const allowedKeywords = [ @@ -155,4 +156,4 @@ export function isUrlBlocked(url: string): boolean { } return false; -} +} \ No newline at end of file From 7b055120548dd6589c742b569963981c1508215a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 6 Mar 2025 21:30:57 +0100 Subject: [PATCH 06/13] fix(credit_billing): teams check --- apps/api/src/services/billing/credit_billing.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 47b5eeab..564cbd4b 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -1,13 +1,10 @@ import { NotificationType } from "../../types"; import { withAuth } from "../../lib/withAuth"; import { sendNotification } from "../notification/email_notification"; -import { supabase_service } from "../supabase"; +import { supabase_rr_service, supabase_service } from "../supabase"; import { logger } from "../../lib/logger"; import * as Sentry from "@sentry/node"; import { AuthCreditUsageChunk } from "../../controllers/v1/types"; -import { getACUC, setCachedACUC } from "../../controllers/auth"; -import { issueCredits } from "./issue_credits"; -import { redlock } from "../redlock"; import { autoCharge } from "./auto_charge"; import { getValue, setValue } from "../redis"; import { queueBillingOperation } from "./batch_billing"; @@ -117,7 +114,7 @@ export async function supaCheckTeamCredits( isAutoRechargeEnabled = parsedData.auto_recharge; autoRechargeThreshold = parsedData.auto_recharge_threshold; } else { - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("teams") .select("auto_recharge, auto_recharge_threshold") .eq("id", team_id) From 57b313608cb6e9f3c123e4754c653fef8dfacbdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 6 Mar 2025 21:35:30 +0100 Subject: [PATCH 07/13] fix(auth): always use replica for acuc --- apps/api/src/controllers/auth.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index f21eaf92..03175722 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -100,7 +100,7 @@ export async function getACUC( ? "auth_credit_usage_chunk_extract" : "auth_credit_usage_chunk_test_22_credit_pack_n_extract"; while (retries < maxRetries) { - const client = Math.random() > 0.75 ? supabase_rr_service : supabase_service; + const client = supabase_rr_service; ({ data, error } = await client.rpc( rpcName, { input_key: api_key }, From 4c4d51e60bbd537c4a7dd73fe075c909c429245b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 17:36:08 -0300 Subject: [PATCH 08/13] Revert "Update runWebScraper.ts" This reverts commit 40eacfacd4c5a782f722fd85e3935f4a1741b27a. --- apps/api/src/main/runWebScraper.ts | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index d9ced895..c6751218 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -146,25 +146,25 @@ export async function runWebScraper({ EngineResultsTracker[Engine], undefined >; - // ScrapeEvents.insert(bull_job_id, { - // type: "scrape", - // url, - // method: engine, - // result: { - // success: result.state === "success", - // response_code: - // result.state === "success" ? result.result.statusCode : undefined, - // response_size: - // result.state === "success" ? result.result.html.length : undefined, - // error: - // result.state === "error" - // ? result.error - // : result.state === "timeout" - // ? "Timed out" - // : undefined, - // time_taken: result.finishedAt - result.startedAt, - // }, - // }); + ScrapeEvents.insert(bull_job_id, { + type: "scrape", + url, + method: engine, + result: { + success: result.state === "success", + response_code: + result.state === "success" ? result.result.statusCode : undefined, + response_size: + result.state === "success" ? result.result.html.length : undefined, + error: + result.state === "error" + ? result.error + : result.state === "timeout" + ? "Timed out" + : undefined, + time_taken: result.finishedAt - result.startedAt, + }, + }); } if (error === undefined && response?.success) { @@ -219,7 +219,7 @@ const saveJob = async ( // // I think the job won't exist here anymore // } } - // ScrapeEvents.logJobEvent(job, "completed"); + ScrapeEvents.logJobEvent(job, "completed"); } catch (error) { _logger.error(`🐂 Failed to update job status`, { module: "runWebScraper", From 783fad90ddabcef79d01460edb36d00c8e1a3734 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 17:45:51 -0300 Subject: [PATCH 09/13] Nick: more read replicas --- apps/api/src/lib/extract/team-id-sync.ts | 4 ++-- apps/api/src/services/billing/auto_charge.ts | 4 ++-- apps/api/src/services/webhook.ts | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/api/src/lib/extract/team-id-sync.ts b/apps/api/src/lib/extract/team-id-sync.ts index 8cf21a14..90a32651 100644 --- a/apps/api/src/lib/extract/team-id-sync.ts +++ b/apps/api/src/lib/extract/team-id-sync.ts @@ -1,9 +1,9 @@ -import { supabase_service } from "../../services/supabase"; +import { supabase_rr_service, supabase_service } from "../../services/supabase"; import { logger } from "../logger"; export async function getTeamIdSyncB(teamId: string) { try { - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("eb-sync") .select("team_id") .eq("team_id", teamId) diff --git a/apps/api/src/services/billing/auto_charge.ts b/apps/api/src/services/billing/auto_charge.ts index ace14c6e..b30be201 100644 --- a/apps/api/src/services/billing/auto_charge.ts +++ b/apps/api/src/services/billing/auto_charge.ts @@ -2,7 +2,7 @@ import { AuthCreditUsageChunk } from "../../controllers/v1/types"; import { getACUC } from "../../controllers/auth"; import { redlock } from "../redlock"; -import { supabase_service } from "../supabase"; +import { supabase_rr_service, supabase_service } from "../supabase"; import { createPaymentIntent } from "./stripe"; import { issueCredits } from "./issue_credits"; import { sendNotification, sendNotificationWithCustomDays } from "../notification/email_notification"; @@ -124,7 +124,7 @@ export async function autoCharge( if (chunk.sub_user_id) { // Fetch the customer's Stripe information const { data: customer, error: customersError } = - await supabase_service + await supabase_rr_service .from("customers") .select("id, stripe_customer_id") .eq("id", chunk.sub_user_id) diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index 75adfe76..6ba9649a 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,6 +1,6 @@ import axios from "axios"; import { logger } from "../lib/logger"; -import { supabase_service } from "./supabase"; +import { supabase_rr_service, supabase_service } from "./supabase"; import { WebhookEventType } from "../types"; import { configDotenv } from "dotenv"; import { z } from "zod"; @@ -36,7 +36,7 @@ export const callWebhook = async ( // Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set // and the USE_DB_AUTHENTICATION environment variable is set to true if (!webhookUrl && useDbAuthentication) { - const { data: webhooksData, error } = await supabase_service + const { data: webhooksData, error } = await supabase_rr_service .from("webhooks") .select("url") .eq("team_id", teamId) From 5a149a1c3071d0ba1b7e5f37f4f20950f3f12992 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 17:53:28 -0300 Subject: [PATCH 10/13] Revert "fix(auth): always use replica for acuc" This reverts commit 57b313608cb6e9f3c123e4754c653fef8dfacbdf. --- apps/api/src/controllers/auth.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 03175722..f21eaf92 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -100,7 +100,7 @@ export async function getACUC( ? "auth_credit_usage_chunk_extract" : "auth_credit_usage_chunk_test_22_credit_pack_n_extract"; while (retries < maxRetries) { - const client = supabase_rr_service; + const client = Math.random() > 0.75 ? supabase_rr_service : supabase_service; ({ data, error } = await client.rpc( rpcName, { input_key: api_key }, From 982b3da27d28e22295b29c5b17a6415a0d15e280 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 6 Mar 2025 17:53:41 -0300 Subject: [PATCH 11/13] Update auth.ts --- apps/api/src/controllers/auth.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index f21eaf92..b0843cdf 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -100,7 +100,7 @@ export async function getACUC( ? "auth_credit_usage_chunk_extract" : "auth_credit_usage_chunk_test_22_credit_pack_n_extract"; while (retries < maxRetries) { - const client = Math.random() > 0.75 ? supabase_rr_service : supabase_service; + const client = Math.random() > 0.5 ? supabase_rr_service : supabase_service; ({ data, error } = await client.rpc( rpcName, { input_key: api_key }, From a1e6c13b6720594f70788d8b1153606db11c2f47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 6 Mar 2025 21:47:10 +0100 Subject: [PATCH 12/13] move crawl to read replica --- apps/api/src/controllers/v1/crawl-status.ts | 4 ++-- apps/api/src/lib/supabase-jobs.ts | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts index 9a5ecaad..96aa578e 100644 --- a/apps/api/src/controllers/v1/crawl-status.ts +++ b/apps/api/src/controllers/v1/crawl-status.ts @@ -21,7 +21,7 @@ import { import { configDotenv } from "dotenv"; import type { Job, JobState, Queue } from "bullmq"; import { logger } from "../../lib/logger"; -import { supabase_service } from "../../services/supabase"; +import { supabase_rr_service, supabase_service } from "../../services/supabase"; import { getConcurrencyLimitedJobs } from "../../lib/concurrency-limit"; configDotenv(); @@ -246,7 +246,7 @@ export async function crawlStatusController( let totalCount = jobIDs.length; if (totalCount === 0 && process.env.USE_DB_AUTHENTICATION === "true") { - const x = await supabase_service + const x = await supabase_rr_service .from('firecrawl_jobs') .select('*', { count: 'exact', head: true }) .eq("crawl_id", req.params.jobId) diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts index 2ed7c02a..e36f3b97 100644 --- a/apps/api/src/lib/supabase-jobs.ts +++ b/apps/api/src/lib/supabase-jobs.ts @@ -1,4 +1,4 @@ -import { supabase_service } from "../services/supabase"; +import { supabase_rr_service, supabase_service } from "../services/supabase"; import { logger } from "./logger"; import * as Sentry from "@sentry/node"; @@ -8,7 +8,7 @@ import * as Sentry from "@sentry/node"; * @returns {any | null} Job */ export const supabaseGetJobById = async (jobId: string) => { - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("firecrawl_jobs") .select("*") .eq("job_id", jobId) @@ -31,7 +31,7 @@ export const supabaseGetJobById = async (jobId: string) => { * @returns {any[]} Jobs */ export const supabaseGetJobsById = async (jobIds: string[]) => { - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("firecrawl_jobs") .select() .in("job_id", jobIds); @@ -55,7 +55,7 @@ export const supabaseGetJobsById = async (jobIds: string[]) => { * @returns {any[]} Jobs */ export const supabaseGetJobsByCrawlId = async (crawlId: string) => { - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("firecrawl_jobs") .select() .eq("crawl_id", crawlId); @@ -74,7 +74,7 @@ export const supabaseGetJobsByCrawlId = async (crawlId: string) => { }; export const supabaseGetJobByIdOnlyData = async (jobId: string) => { - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("firecrawl_jobs") .select("docs, team_id") .eq("job_id", jobId) From 39b13902897e20cd9cacc724016d35593dfc410c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 6 Mar 2025 21:57:33 +0100 Subject: [PATCH 13/13] fix: crawl --- apps/api/src/services/idempotency/validate.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/idempotency/validate.ts b/apps/api/src/services/idempotency/validate.ts index 5a347f67..54ec7bd0 100644 --- a/apps/api/src/services/idempotency/validate.ts +++ b/apps/api/src/services/idempotency/validate.ts @@ -1,5 +1,5 @@ import { Request } from "express"; -import { supabase_service } from "../supabase"; +import { supabase_rr_service, supabase_service } from "../supabase"; import { validate as isUuid } from "uuid"; import { logger } from "../../../src/lib/logger"; @@ -18,7 +18,7 @@ export async function validateIdempotencyKey(req: Request): Promise { return false; } - const { data, error } = await supabase_service + const { data, error } = await supabase_rr_service .from("idempotency_keys") .select("key") .eq("key", idempotencyKey);