diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 0a3ef705..ba382040 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -6,6 +6,15 @@ configDotenv(); const hashKey = Buffer.from(process.env.HASH_KEY || "", "utf-8"); const algorithm = "aes-256-ecb"; +function encryptAES(plaintext: string, key: Buffer): string { + const cipher = crypto.createCipheriv(algorithm, key, null); + const encrypted = Buffer.concat([ + cipher.update(plaintext, "utf-8"), + cipher.final() + ]); + return encrypted.toString("base64"); +} + function decryptAES(ciphertext: string, key: Buffer): string { const decipher = crypto.createDecipheriv(algorithm, key, null); const decrypted = Buffer.concat([ @@ -42,6 +51,21 @@ const urlBlocklist = [ "PTbGg8PK/h0Seyw4HEpK4Q==", "lZdQMknjHb7+4+sjF3qNTw==", "LsgSq54q5oDysbva29JxnQ==", + "KZfBtpwjOpdSoqacRbz7og==", + "Indtl4yxJMHCKBGF4KABCQ==", + "e3HFXLVgxhaVoadYpwb2BA==", + "b+asgLayXQ5Jq+se+q56jA==", + "86ZDUI7vmp4MvNq3fvZrGQ==", + "sEGFoYZ6GEg4Zocd+TiyfQ==", + "6OOL72eXthgnJ1Hj4PfOQQ==", + "g/ME+Sh1CAFboKrwkVb+5Q==", + "Pw+xawUoX8xBYbX2yqqGWQ==", + "k6vBalxYFhAvkPsF19t9gQ==", + "e3HFXLVgxhaVoadYpwb2BA==", + "b+asgLayXQ5Jq+se+q56jA==", + "KKttwRz4w+AMJrZcB828WQ==", + "vMdzZ33BXoyWVZnAPOBcrg==", + "l8GDVI8w/ueHnNzdN1ODuQ==", ]; const decryptedBlocklist = hashKey.length > 0 ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey)) : []; @@ -104,4 +128,4 @@ export function isUrlBlocked(url: string): boolean { logger.error(`Error parsing the following URL: ${url}`); return false; } -} +} \ No newline at end of file diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 29f4b84f..9fd8861b 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -391,22 +391,21 @@ async function processJob(job: Job & { id: string }, token: string) { // Check if the job URL is researchhub and block it immediately // TODO: remove this once solve the root issue - if ( - job.data.url && - (job.data.url.includes("researchhub.com") || - job.data.url.includes("ebay.com") || - job.data.url.includes("youtube.com")) - ) { - logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); - const data = { - success: false, - document: null, - project_id: job.data.project_id, - error: - "URL is blocked. Suspecious activity detected. Please contact help@firecrawl.com if you believe this is an error.", - }; - return data; - } + // if ( + // job.data.url && + // (job.data.url.includes("researchhub.com") || + // job.data.url.includes("ebay.com")) + // ) { + // logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`); + // const data = { + // success: false, + // document: null, + // project_id: job.data.project_id, + // error: + // "URL is blocked. Suspecious activity detected. Please contact help@firecrawl.com if you believe this is an error.", + // }; + // return data; + // } try { job.updateProgress({