From fd7fdc1d52eba80bffdcbd51ac446519d5bd7593 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 19 Aug 2024 13:28:54 -0300 Subject: [PATCH 1/3] added blocklist middleware --- .../src/__tests__/e2e_v1_withAuth/index.test.ts | 14 ++++++++++++++ apps/api/src/routes/v1.ts | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index 6d2a0a14..b44a7811 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -36,6 +36,20 @@ describe("E2E Tests for v1 API Routes", () => { expect(response.statusCode).toBe(401); }); + const scrapeRequest: ScrapeRequest = { + url: "https://facebook.com/fake-test", + }; + + const response = await request(TEST_URL) + .post("/v1/scrape") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send(scrapeRequest); + + expect(response.statusCode).toBe(403); + expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."); + }); + it.concurrent( "should return an error response with an invalid API key", async () => { diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index ea555987..0807bc0f 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -15,6 +15,7 @@ import { checkTeamCredits } from "../services/billing/credit_billing"; import { v4 as uuidv4 } from "uuid"; import expressWs from "express-ws"; import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; +import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -69,6 +70,13 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) .catch(err => next(err)); } +function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { + if (isUrlBlocked(req.body.url)) { + return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." }); + } + next(); +} + function wrap(controller: (req: Request, res: Response) => Promise): (req: Request, res: Response, next: NextFunction) => any { return (req, res, next) => { controller(req, res) @@ -82,6 +90,7 @@ export const v1Router = express.Router(); v1Router.post( "/scrape", + blocklistMiddleware, authMiddleware(RateLimiterMode.Scrape), checkCreditsMiddleware(1), wrap(scrapeController) @@ -89,6 +98,7 @@ v1Router.post( v1Router.post( "/crawl", + blocklistMiddleware, authMiddleware(RateLimiterMode.Crawl), idempotencyMiddleware, checkCreditsMiddleware(1), @@ -97,6 +107,7 @@ v1Router.post( v1Router.post( "/map", + blocklistMiddleware, authMiddleware(RateLimiterMode.Crawl), checkCreditsMiddleware(1), wrap(mapController) From 72461ce9a64405064fa6e3482cf58a02728a236d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 19 Aug 2024 13:29:52 -0300 Subject: [PATCH 2/3] Update index.test.ts --- apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index b44a7811..d10a286f 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -36,6 +36,7 @@ describe("E2E Tests for v1 API Routes", () => { expect(response.statusCode).toBe(401); }); + it.concurrent("should throw error for blocklisted URL", async () => { const scrapeRequest: ScrapeRequest = { url: "https://facebook.com/fake-test", }; From 3dc298be5483792f866c64de7a74d46949957cc2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 19 Aug 2024 13:52:54 -0300 Subject: [PATCH 3/3] Nick: 2x rate limits for standard and growth for /scrape --- apps/api/src/services/rate-limiter.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 05fb102c..f1399b13 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -20,14 +20,14 @@ const RATE_LIMITS = { default: 20, free: 5, starter: 20, - standard: 50, + standard: 100, standardOld: 40, scale: 500, hobby: 10, - standardNew: 50, - standardnew: 50, - growth: 500, - growthdouble: 500, + standardNew: 100, + standardnew: 100, + growth: 1000, + growthdouble: 1000, }, search: { default: 20,