diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index 6d2a0a14..b44a7811 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -36,6 +36,20 @@ describe("E2E Tests for v1 API Routes", () => { expect(response.statusCode).toBe(401); }); + const scrapeRequest: ScrapeRequest = { + url: "https://facebook.com/fake-test", + }; + + const response = await request(TEST_URL) + .post("/v1/scrape") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send(scrapeRequest); + + expect(response.statusCode).toBe(403); + expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."); + }); + it.concurrent( "should return an error response with an invalid API key", async () => { diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index ea555987..0807bc0f 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -15,6 +15,7 @@ import { checkTeamCredits } from "../services/billing/credit_billing"; import { v4 as uuidv4 } from "uuid"; import expressWs from "express-ws"; import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; +import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -69,6 +70,13 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) .catch(err => next(err)); } +function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { + if (isUrlBlocked(req.body.url)) { + return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." }); + } + next(); +} + function wrap(controller: (req: Request, res: Response) => Promise): (req: Request, res: Response, next: NextFunction) => any { return (req, res, next) => { controller(req, res) @@ -82,6 +90,7 @@ export const v1Router = express.Router(); v1Router.post( "/scrape", + blocklistMiddleware, authMiddleware(RateLimiterMode.Scrape), checkCreditsMiddleware(1), wrap(scrapeController) @@ -89,6 +98,7 @@ v1Router.post( v1Router.post( "/crawl", + blocklistMiddleware, authMiddleware(RateLimiterMode.Crawl), idempotencyMiddleware, checkCreditsMiddleware(1), @@ -97,6 +107,7 @@ v1Router.post( v1Router.post( "/map", + blocklistMiddleware, authMiddleware(RateLimiterMode.Crawl), checkCreditsMiddleware(1), wrap(mapController)