mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-22 18:01:21 +08:00
commit
6b41916e1a
@ -123,7 +123,7 @@ function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
|
|||||||
return res.status(403).json({
|
return res.status(403).json({
|
||||||
success: false,
|
success: false,
|
||||||
error:
|
error:
|
||||||
"URL is blocked intentionally. Firecrawl currently does not support social media scraping due to policy restrictions.",
|
"URL is blocked intentionally. Firecrawl currently does not support scraping this site due to policy restrictions.",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,26 +1,51 @@
|
|||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
|
import crypto from "crypto";
|
||||||
|
import { configDotenv } from "dotenv";
|
||||||
|
configDotenv();
|
||||||
|
|
||||||
const socialMediaBlocklist = [
|
const hashKey = Buffer.from(process.env.HASH_KEY || "", "utf-8");
|
||||||
"facebook.com",
|
const algorithm = "aes-256-ecb";
|
||||||
"x.com",
|
|
||||||
"twitter.com",
|
function decryptAES(ciphertext: string, key: Buffer): string {
|
||||||
"instagram.com",
|
const decipher = crypto.createDecipheriv(algorithm, key, null);
|
||||||
"linkedin.com",
|
const decrypted = Buffer.concat([
|
||||||
"snapchat.com",
|
decipher.update(Buffer.from(ciphertext, "base64")),
|
||||||
"tiktok.com",
|
decipher.final(),
|
||||||
"reddit.com",
|
]);
|
||||||
"tumblr.com",
|
return decrypted.toString("utf-8");
|
||||||
"flickr.com",
|
}
|
||||||
"whatsapp.com",
|
|
||||||
"wechat.com",
|
const urlBlocklist = [
|
||||||
"telegram.org",
|
"h8ngAFXUNLO3ZqQufJjGVA==",
|
||||||
"researchhub.com",
|
"fEGiDm/TWDBkXUXejFVICg==",
|
||||||
"youtube.com",
|
"l6Mei7IGbEmTTFoSudUnqQ==",
|
||||||
"corterix.com",
|
"4OjallJzXRiZUAWDiC2Xww==",
|
||||||
"southwest.com",
|
"ReSvkSfx34TNEdecmmSDdQ==",
|
||||||
"ryanair.com",
|
"X1E4WtdmXAv3SAX9xN925Q==",
|
||||||
|
"VTzBQfMtXZzM05mnNkWkjA==",
|
||||||
|
"m/q4Lb2Z8cxwU7/CoztOFg==",
|
||||||
|
"UbVnmRaeG+gKcyVDLAm0vg==",
|
||||||
|
"xNQhczYG22tTVc6lYE3qwg==",
|
||||||
|
"CQfGDydbg4l1swRCru6O6Q==",
|
||||||
|
"l86LQxm2NonTWMauXwEsPw==",
|
||||||
|
"6v4QDUcwjnID80G+uU+tgw==",
|
||||||
|
"pCF/6nrKZAxaYntzEGluZQ==",
|
||||||
|
"r0CRhAmQqSe7V2s3073T00sAh4WcS5779jwuGJ26ows==",
|
||||||
|
"aBOVqRFBM4UVg33usY10NdiF0HCnFH/ImtD0n+zIpc8==",
|
||||||
|
"QV436UZuQ6D0Dqrx9MwaGw==",
|
||||||
|
"OYVvrwILYbzA2mSSqOPPpw==",
|
||||||
|
"xW2i4C0Dzcnp+qu12u0SAw==",
|
||||||
|
"OLHba209l0dfl0MI4EnQonBITK9z8Qwgd/NsuaTkXmA=",
|
||||||
|
"X0VynmNjpL3PrYxpUIG7sFMBt8OlrmQWtxj8oXVu2QM=",
|
||||||
|
"ObdlM5NEkvBJ/sojRW5K/Q==",
|
||||||
|
"C8Th38X0SjsE1vL/OsD8bA==",
|
||||||
|
"PTbGg8PK/h0Seyw4HEpK4Q==",
|
||||||
|
"lZdQMknjHb7+4+sjF3qNTw==",
|
||||||
|
"LsgSq54q5oDysbva29JxnQ==",
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const decryptedBlocklist = hashKey.length > 0 ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey)) : [];
|
||||||
|
|
||||||
const allowedKeywords = [
|
const allowedKeywords = [
|
||||||
"pulse",
|
"pulse",
|
||||||
"privacy",
|
"privacy",
|
||||||
@ -65,7 +90,7 @@ export function isUrlBlocked(url: string): boolean {
|
|||||||
const hostname = urlObj.hostname.toLowerCase();
|
const hostname = urlObj.hostname.toLowerCase();
|
||||||
|
|
||||||
// Check if the URL matches any domain in the blocklist
|
// Check if the URL matches any domain in the blocklist
|
||||||
const isBlocked = socialMediaBlocklist.some((domain) => {
|
const isBlocked = decryptedBlocklist.some((domain) => {
|
||||||
const domainPattern = new RegExp(
|
const domainPattern = new RegExp(
|
||||||
`(^|\\.)${domain.replace(".", "\\.")}(\\.|$)`,
|
`(^|\\.)${domain.replace(".", "\\.")}(\\.|$)`,
|
||||||
"i",
|
"i",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user