added blocklist middleware

This commit is contained in:
rafaelsideguide 2024-08-19 13:28:54 -03:00
parent ff84f1fe5e
commit fd7fdc1d52
2 changed files with 25 additions and 0 deletions

View File

@ -36,6 +36,20 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
const scrapeRequest: ScrapeRequest = {
url: "https://facebook.com/fake-test",
};
const response = await request(TEST_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);
expect(response.statusCode).toBe(403);
expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
});
it.concurrent( it.concurrent(
"should return an error response with an invalid API key", "should return an error response with an invalid API key",
async () => { async () => {

View File

@ -15,6 +15,7 @@ import { checkTeamCredits } from "../services/billing/credit_billing";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import expressWs from "express-ws"; import expressWs from "express-ws";
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
// import { searchController } from "../../src/controllers/v1/search"; // import { searchController } from "../../src/controllers/v1/search";
@ -69,6 +70,13 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction)
.catch(err => next(err)); .catch(err => next(err));
} }
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
if (isUrlBlocked(req.body.url)) {
return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
}
next();
}
function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any { function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
return (req, res, next) => { return (req, res, next) => {
controller(req, res) controller(req, res)
@ -82,6 +90,7 @@ export const v1Router = express.Router();
v1Router.post( v1Router.post(
"/scrape", "/scrape",
blocklistMiddleware,
authMiddleware(RateLimiterMode.Scrape), authMiddleware(RateLimiterMode.Scrape),
checkCreditsMiddleware(1), checkCreditsMiddleware(1),
wrap(scrapeController) wrap(scrapeController)
@ -89,6 +98,7 @@ v1Router.post(
v1Router.post( v1Router.post(
"/crawl", "/crawl",
blocklistMiddleware,
authMiddleware(RateLimiterMode.Crawl), authMiddleware(RateLimiterMode.Crawl),
idempotencyMiddleware, idempotencyMiddleware,
checkCreditsMiddleware(1), checkCreditsMiddleware(1),
@ -97,6 +107,7 @@ v1Router.post(
v1Router.post( v1Router.post(
"/map", "/map",
blocklistMiddleware,
authMiddleware(RateLimiterMode.Crawl), authMiddleware(RateLimiterMode.Crawl),
checkCreditsMiddleware(1), checkCreditsMiddleware(1),
wrap(mapController) wrap(mapController)