From 24e8aaf6b5a302b0accd1473de0e66f2f68b35a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adem=C3=ADlson=20F=2E=20Tonato?= <ademilsonft@outlook.com>
Date: Fri, 31 Jan 2025 13:14:48 +0000
Subject: [PATCH 1/2] feat(blocklist): Improve URL blocking with tldts parsing

---
 apps/api/package.json                         |   3 +-
 apps/api/pnpm-lock.yaml                       |  16 +++
 .../utils/__tests__/blocklist.test.ts         | 131 ++++++------------
 .../src/scraper/WebScraper/utils/blocklist.ts |  76 ++++++----
 4 files changed, 111 insertions(+), 115 deletions(-)

diff --git a/apps/api/package.json b/apps/api/package.json
index 8c92746f..1c554728 100644
--- a/apps/api/package.json
+++ b/apps/api/package.json
@@ -121,6 +121,7 @@
     "scrapingbee": "^1.7.4",
     "stripe": "^16.1.0",
     "systeminformation": "^5.22.11",
+    "tldts": "^6.1.75",
     "turndown": "^7.1.3",
     "turndown-plugin-gfm": "^1.0.2",
     "typesense": "^1.5.4",
@@ -142,4 +143,4 @@
       "temp"
     ]
   }
-}
\ No newline at end of file
+}
diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml
index 7478cef9..f66dfe72 100644
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@@ -203,6 +203,9 @@ importers:
       systeminformation:
         specifier: ^5.22.11
         version: 5.22.11
+      tldts:
+        specifier: ^6.1.75
+        version: 6.1.75
       turndown:
         specifier: ^7.1.3
         version: 7.2.0
@@ -4286,6 +4289,13 @@ packages:
   through@2.3.8:
     resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
 
+  tldts-core@6.1.75:
+    resolution: {integrity: sha512-AOvV5YYIAFFBfransBzSTyztkc3IMfz5Eq3YluaRiEu55nn43Fzaufx70UqEKYr8BoLCach4q8g/bg6e5+/aFw==}
+
+  tldts@6.1.75:
+    resolution: {integrity: sha512-+lFzEXhpl7JXgWYaXcB6DqTYXbUArvrWAE/5ioq/X3CdWLbDjpPP4XTrQBmEJ91y3xbe4Fkw7Lxv4P3GWeJaNg==}
+    hasBin: true
+
   tmpl@1.0.5:
     resolution: {integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==}
 
@@ -9612,6 +9622,12 @@ snapshots:
 
   through@2.3.8: {}
 
+  tldts-core@6.1.75: {}
+
+  tldts@6.1.75:
+    dependencies:
+      tldts-core: 6.1.75
+
   tmpl@1.0.5: {}
 
   to-fast-properties@2.0.0: {}
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
index d3963685..8144f2c8 100644
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
+++ b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
@@ -1,94 +1,53 @@
 import { isUrlBlocked } from "../blocklist";
 
-describe("Blocklist Functionality", () => {
-  describe("isUrlBlocked", () => {
-    test.each([
-      "https://facebook.com/fake-test",
-      "https://x.com/user-profile",
-      "https://twitter.com/home",
-      "https://instagram.com/explore",
-      "https://linkedin.com/in/johndoe",
-      "https://snapchat.com/add/johndoe",
-      "https://tiktok.com/@johndoe",
-      "https://reddit.com/r/funny",
-      "https://tumblr.com/dashboard",
-      "https://flickr.com/photos/johndoe",
-      "https://whatsapp.com/download",
-      "https://wechat.com/features",
-      "https://telegram.org/apps",
-    ])("should return true for blocklisted URL %s", (url) => {
-      expect(isUrlBlocked(url)).toBe(true);
-    });
-
-    test.each([
-      "https://facebook.com/policy",
-      "https://twitter.com/tos",
-      "https://instagram.com/about/legal/terms",
-      "https://linkedin.com/legal/privacy-policy",
-      "https://pinterest.com/about/privacy",
-      "https://snapchat.com/legal/terms",
-      "https://tiktok.com/legal/privacy-policy",
-      "https://reddit.com/policies",
-      "https://tumblr.com/policy/en/privacy",
-      "https://flickr.com/help/terms",
-      "https://whatsapp.com/legal",
-      "https://wechat.com/en/privacy-policy",
-      "https://telegram.org/tos",
-    ])("should return false for allowed URLs with keywords %s", (url) => {
-      expect(isUrlBlocked(url)).toBe(false);
-    });
-
-    test("should return false for non-blocklisted domain", () => {
-      const url = "https://example.com";
-      expect(isUrlBlocked(url)).toBe(false);
-    });
-
-    test("should handle invalid URLs gracefully", () => {
-      const url = "htp://invalid-url";
-      expect(isUrlBlocked(url)).toBe(false);
-    });
+describe("isUrlBlocked function", () => {
+  test("Blocks exact domain facebook.com", () => {
+    expect(isUrlBlocked("facebook.com")).toBe(true);
+    expect(isUrlBlocked("http://facebook.com")).toBe(true);
+    expect(isUrlBlocked("https://facebook.com")).toBe(true);
   });
 
-  test.each([
-    "https://subdomain.facebook.com",
-    "https://facebook.com.someotherdomain.com",
-    "https://www.facebook.com/profile",
-    "https://api.twitter.com/info",
-    "https://instagram.com/accounts/login",
-  ])(
-    "should return true for URLs with blocklisted domains in subdomains or paths %s",
-    (url) => {
-      expect(isUrlBlocked(url)).toBe(true);
-    },
-  );
-
-  test.each([
-    "https://example.com/facebook.com",
-    "https://example.com/redirect?url=https://twitter.com",
-    "https://facebook.com.policy.example.com",
-  ])(
-    "should return false for URLs where blocklisted domain is part of another domain or path %s",
-    (url) => {
-      expect(isUrlBlocked(url)).toBe(false);
-    },
-  );
-
-  test.each(["https://FACEBOOK.com", "https://INSTAGRAM.com/@something"])(
-    "should handle case variations %s",
-    (url) => {
-      expect(isUrlBlocked(url)).toBe(true);
-    },
-  );
-
-  test.each([
-    "https://facebook.com?redirect=https://example.com",
-    "https://twitter.com?query=something",
-  ])("should handle query parameters %s", (url) => {
-    expect(isUrlBlocked(url)).toBe(true);
+  test("Blocks subdomains of facebook.com", () => {
+    expect(isUrlBlocked("www.facebook.com")).toBe(true);
+    expect(isUrlBlocked("ads.facebook.com")).toBe(true);
+    expect(isUrlBlocked("business.facebook.com")).toBe(true);
   });
 
-  test("should handle internationalized domain names", () => {
-    const url = "https://xn--d1acpjx3f.xn--p1ai";
-    expect(isUrlBlocked(url)).toBe(false);
+  test("Blocks different TLDs (facebook.pt, facebook.io)", () => {
+    expect(isUrlBlocked("facebook.pt")).toBe(true);
+    expect(isUrlBlocked("facebook.io")).toBe(true);
+    expect(isUrlBlocked("facebook.co.uk")).toBe(true);
+    expect(isUrlBlocked("https://facebook.de")).toBe(true);
+  });
+
+  test("Allows unrelated domains like whateverfacebook.com", () => {
+    expect(isUrlBlocked("whateverfacebook.com")).toBe(false);
+    expect(isUrlBlocked("https://whateverfacebook.com")).toBe(false);
+  });
+
+  test("Blocks other domains from the blocklist", () => {
+    expect(isUrlBlocked("tiktok.com")).toBe(true);
+    expect(isUrlBlocked("www.tiktok.com")).toBe(true);
+    expect(isUrlBlocked("reddit.com")).toBe(true);
+    expect(isUrlBlocked("youtube.com")).toBe(true);
+  });
+
+  test("Allows allowed keywords URLs", () => {
+    expect(isUrlBlocked("https://www.facebook.com/ads/library")).toBe(false);
+    expect(isUrlBlocked("https://developers.facebook.com")).toBe(false);
+    expect(isUrlBlocked("https://library.tiktok.com")).toBe(false);
+  });
+
+  test("Handles URLs with and without protocols", () => {
+    expect(isUrlBlocked("facebook.com")).toBe(true);
+    expect(isUrlBlocked("http://facebook.com")).toBe(true);
+    expect(isUrlBlocked("https://facebook.com")).toBe(true);
+    expect(isUrlBlocked("www.facebook.com")).toBe(true);
+  });
+
+  test("Should return false if the URL is invalid", () => {
+    expect(isUrlBlocked("randomstring")).toBe(false);
+    expect(isUrlBlocked("htp://bad.url")).toBe(false);
+    expect(isUrlBlocked("")).toBe(false);
   });
 });
diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
index 691af596..f1e28892 100644
--- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts
+++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
@@ -1,6 +1,7 @@
-import { logger } from "../../../lib/logger";
-import crypto from "crypto";
 import { configDotenv } from "dotenv";
+import crypto from "crypto";
+import { parse } from "tldts";
+
 configDotenv();
 
 const hashKey = Buffer.from(process.env.HASH_KEY || "", "utf-8");
@@ -66,7 +67,7 @@ const urlBlocklist = [
   "KKttwRz4w+AMJrZcB828WQ==",
   "vMdzZ33BXoyWVZnAPOBcrg==",
   "l8GDVI8w/ueHnNzdN1ODuQ==",
-  "+yz9bnYYMnC0trJZGJwf6Q=="
+  "+yz9bnYYMnC0trJZGJwf6Q==",
 ];
 
 const decryptedBlocklist =
@@ -100,38 +101,57 @@ const allowedKeywords = [
 ];
 
 export function isUrlBlocked(url: string): boolean {
-  const lowerCaseUrl = url.toLowerCase();
+  const lowerCaseUrl = url.trim().toLowerCase();
 
-  // Check if the URL contains any allowed keywords as whole words
+  const decryptedUrl =
+    decryptedBlocklist.find((decrypted) => lowerCaseUrl === decrypted) ||
+    lowerCaseUrl;
+
+  // If the URL is empty or invalid, return false
+  let parsedUrl;
+  try {
+    parsedUrl = parse(decryptedUrl);
+  } catch {
+    console.log("Error parsing URL:", url);
+    return false;
+  }
+
+  const domain = parsedUrl.domain;
+  const publicSuffix = parsedUrl.publicSuffix;
+
+  if (!domain) {
+    return false;
+  }
+
+  // Check if URL contains any allowed keyword
   if (
     allowedKeywords.some((keyword) =>
-      new RegExp(`\\b${keyword}\\b`, "i").test(lowerCaseUrl),
+      lowerCaseUrl.includes(keyword.toLowerCase()),
     )
   ) {
     return false;
   }
 
-  try {
-    if (!url.startsWith("http://") && !url.startsWith("https://")) {
-      url = "https://" + url;
-    }
-
-    const urlObj = new URL(url);
-    const hostname = urlObj.hostname.toLowerCase();
-
-    // Check if the URL matches any domain in the blocklist
-    const isBlocked = decryptedBlocklist.some((domain) => {
-      const domainPattern = new RegExp(
-        `(^|\\.)${domain.replace(".", "\\.")}(\\.|$)`,
-        "i",
-      );
-      return domainPattern.test(hostname);
-    });
-
-    return isBlocked;
-  } catch (e) {
-    // If an error occurs (e.g., invalid URL), return false
-    logger.error(`Error parsing the following URL: ${url}`);
-    return false;
+  // Block exact matches
+  if (decryptedBlocklist.includes(domain)) {
+    return true;
   }
+
+  // Block subdomains
+  if (decryptedBlocklist.some((blocked) => domain.endsWith(`.${blocked}`))) {
+    return true;
+  }
+
+  // Block different TLDs of the same base domain
+  const baseDomain = domain.split(".")[0]; // Extract the base domain (e.g., "facebook" from "facebook.com")
+  if (
+    publicSuffix &&
+    decryptedBlocklist.some(
+      (blocked) => blocked.startsWith(baseDomain) && blocked !== domain,
+    )
+  ) {
+    return true;
+  }
+
+  return false;
 }

From bcd74498e3d83f833933704abd55295bbc6c160b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adem=C3=ADlson=20F=2E=20Tonato?= <ademilsonft@outlook.com>
Date: Fri, 31 Jan 2025 18:23:38 +0000
Subject: [PATCH 2/2] test(blocklist): Enhance URL blocking test coverage with
 decrypted domain validation

---
 apps/api/src/lib/extract/build-prompts.ts     |   2 +-
 .../utils/__tests__/blocklist.test.ts         | 162 ++++++++++++++----
 .../src/scraper/WebScraper/utils/blocklist.ts |  31 ++--
 3 files changed, 150 insertions(+), 45 deletions(-)

diff --git a/apps/api/src/lib/extract/build-prompts.ts b/apps/api/src/lib/extract/build-prompts.ts
index 6d89eb2f..24710660 100644
--- a/apps/api/src/lib/extract/build-prompts.ts
+++ b/apps/api/src/lib/extract/build-prompts.ts
@@ -40,7 +40,7 @@ to determine their relevance to the user's query and intent.
 }
 
 export function buildRerankerUserPrompt(searchQuery: string): string {
-  return `Given these URLs and their content, identify which ones are relevant to the user's extraction request: "${searchQuery}". Return an array of relevant links with their relevance scores (0-1). Higher scores should be given to URLs that directly address the user's extraction request. Be very mindful with the links you select, as if they are not that relevant it may affect the quality of the extraction. Only include URLs that have a relvancy score of 0.6+.`;
+  return `Given these URLs and their content, identify which ones are relevant to the user's extraction request: "${searchQuery}". Return an array of relevant links with their relevance scores (0-1). Higher scores should be given to URLs that directly address the user's extraction request. Be very mindful with the links you select, as if they are not that relevant it may affect the quality of the extraction. Only include URLs that have a relevancy score of 0.6+.`;
 }
 
 // Multi entity schema anlayzer
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
index 8144f2c8..cbba98e1 100644
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
+++ b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
@@ -1,23 +1,111 @@
-import { isUrlBlocked } from "../blocklist";
+import { decryptAES, isUrlBlocked } from "../blocklist";
+
+const hashKey = Buffer.from(process.env.HASH_KEY || "", "utf-8");
 
 describe("isUrlBlocked function", () => {
-  test("Blocks exact domain facebook.com", () => {
-    expect(isUrlBlocked("facebook.com")).toBe(true);
-    expect(isUrlBlocked("http://facebook.com")).toBe(true);
-    expect(isUrlBlocked("https://facebook.com")).toBe(true);
+  beforeAll(() => {
+    // Mock the decryptedBlocklist function to return known values
+    jest
+      .spyOn(require("../blocklist"), "decryptedBlocklist")
+      .mockReturnValue([
+        "h8ngAFXUNLO3ZqQufJjGVA==",
+        "fEGiDm/TWDBkXUXejFVICg==",
+        "l6Mei7IGbEmTTFoSudUnqQ==",
+        "4OjallJzXRiZUAWDiC2Xww==",
+        "ReSvkSfx34TNEdecmmSDdQ==",
+        "X1E4WtdmXAv3SAX9xN925Q==",
+        "VTzBQfMtXZzM05mnNkWkjA==",
+        "m/q4Lb2Z8cxwU7/CoztOFg==",
+        "UbVnmRaeG+gKcyVDLAm0vg==",
+        "xNQhczYG22tTVc6lYE3qwg==",
+        "CQfGDydbg4l1swRCru6O6Q==",
+        "l86LQxm2NonTWMauXwEsPw==",
+        "6v4QDUcwjnID80G+uU+tgw==",
+        "pCF/6nrKZAxaYntzEGluZQ==",
+        "r0CRhAmQqSe7V2s3073T00sAh4WcS5779jwuGJ26ows==",
+        "aBOVqRFBM4UVg33usY10NdiF0HCnFH/ImtD0n+zIpc8==",
+        "QV436UZuQ6D0Dqrx9MwaGw==",
+        "OYVvrwILYbzA2mSSqOPPpw==",
+        "xW2i4C0Dzcnp+qu12u0SAw==",
+        "OLHba209l0dfl0MI4EnQonBITK9z8Qwgd/NsuaTkXmA=",
+        "X0VynmNjpL3PrYxpUIG7sFMBt8OlrmQWtxj8oXVu2QM=",
+        "ObdlM5NEkvBJ/sojRW5K/Q==",
+        "C8Th38X0SjsE1vL/OsD8bA==",
+        "PTbGg8PK/h0Seyw4HEpK4Q==",
+        "lZdQMknjHb7+4+sjF3qNTw==",
+        "LsgSq54q5oDysbva29JxnQ==",
+        "KZfBtpwjOpdSoqacRbz7og==",
+        "Indtl4yxJMHCKBGF4KABCQ==",
+        "e3HFXLVgxhaVoadYpwb2BA==",
+        "b+asgLayXQ5Jq+se+q56jA==",
+        "86ZDUI7vmp4MvNq3fvZrGQ==",
+        "sEGFoYZ6GEg4Zocd+TiyfQ==",
+        "6OOL72eXthgnJ1Hj4PfOQQ==",
+        "g/ME+Sh1CAFboKrwkVb+5Q==",
+        "Pw+xawUoX8xBYbX2yqqGWQ==",
+        "k6vBalxYFhAvkPsF19t9gQ==",
+        "b+asgLayXQ5Jq+se+q56jA==",
+        "KKttwRz4w+AMJrZcB828WQ==",
+        "vMdzZ33BXoyWVZnAPOBcrg==",
+        "l8GDVI8w/ueHnNzdN1ODuQ==",
+        "+yz9bnYYMnC0trJZGJwf6Q==",
+      ]);
   });
 
-  test("Blocks subdomains of facebook.com", () => {
-    expect(isUrlBlocked("www.facebook.com")).toBe(true);
-    expect(isUrlBlocked("ads.facebook.com")).toBe(true);
-    expect(isUrlBlocked("business.facebook.com")).toBe(true);
+  test("Blocks exact domain with and without protocol", () => {
+    expect(isUrlBlocked(decryptAES("KZfBtpwjOpdSoqacRbz7og==", hashKey))).toBe(
+      true,
+    );
+    expect(
+      isUrlBlocked(
+        decryptAES("TemsdmaA9kBK9cVJTaAmZksAh4WcS5779jwuGJ26ows=", hashKey),
+      ),
+    ).toBe(true);
+    expect(
+      isUrlBlocked(
+        decryptAES("0pCVMPgc7+IMrLjIA5lFV0ttO4rKIA14yZBb+2FDG7I=", hashKey),
+      ),
+    ).toBe(true);
+    expect(
+      isUrlBlocked(
+        decryptAES("m+PjIWE9E4GF3lA/B9cUMDj3smbHhZYOGxP74UTmd3M=", hashKey),
+      ),
+    ).toBe(true);
   });
 
-  test("Blocks different TLDs (facebook.pt, facebook.io)", () => {
-    expect(isUrlBlocked("facebook.pt")).toBe(true);
-    expect(isUrlBlocked("facebook.io")).toBe(true);
-    expect(isUrlBlocked("facebook.co.uk")).toBe(true);
-    expect(isUrlBlocked("https://facebook.de")).toBe(true);
+  test("Blocks subdomains of a blocked domain", () => {
+    expect(
+      isUrlBlocked(
+        decryptAES("m+PjIWE9E4GF3lA/B9cUMDj3smbHhZYOGxP74UTmd3M=", hashKey),
+      ),
+    ).toBe(true);
+    expect(
+      isUrlBlocked(
+        decryptAES("o/ClKrW6Qo0uidbD2X8cVjj3smbHhZYOGxP74UTmd3M=", hashKey),
+      ),
+    ).toBe(true);
+    expect(
+      isUrlBlocked(
+        decryptAES("Z53Ny7rvn7cBX/2bYpOZrRDosKfU7BiSM0OClb4bdWY=", hashKey),
+      ),
+    ).toBe(true);
+  });
+
+  test("Blocks different TLDs (BLOCKED-DOMAIN.pt, BLOCKED-DOMAIN.io)", () => {
+    expect(isUrlBlocked(decryptAES("vUMeqQdqk7ajwczYBr6prA==", hashKey))).toBe(
+      true,
+    );
+    expect(isUrlBlocked(decryptAES("WOjW9VwGwrPu846jDo6VQg==", hashKey))).toBe(
+      true,
+    );
+    expect(isUrlBlocked(decryptAES("Ti3vVa6sRew3wyTZ7a/Yag==", hashKey))).toBe(
+      true,
+    );
+    expect(
+      isUrlBlocked(
+        decryptAES("0pCVMPgc7+IMrLjIA5lFV5cYWcOWC5LGWwvlbCW2GH4=", hashKey),
+      ),
+    ).toBe(true);
   });
 
   test("Allows unrelated domains like whateverfacebook.com", () => {
@@ -26,23 +114,39 @@ describe("isUrlBlocked function", () => {
   });
 
   test("Blocks other domains from the blocklist", () => {
-    expect(isUrlBlocked("tiktok.com")).toBe(true);
-    expect(isUrlBlocked("www.tiktok.com")).toBe(true);
-    expect(isUrlBlocked("reddit.com")).toBe(true);
-    expect(isUrlBlocked("youtube.com")).toBe(true);
+    expect(isUrlBlocked(decryptAES("e3HFXLVgxhaVoadYpwb2BA==", hashKey))).toBe(
+      true,
+    );
+    expect(isUrlBlocked(decryptAES("XS61fAjZb5JfAWsyzzOoCQ==", hashKey))).toBe(
+      true,
+    );
+    expect(isUrlBlocked(decryptAES("Indtl4yxJMHCKBGF4KABCQ==", hashKey))).toBe(
+      true,
+    );
+    expect(isUrlBlocked(decryptAES("86ZDUI7vmp4MvNq3fvZrGQ==", hashKey))).toBe(
+      true,
+    );
   });
 
-  test("Allows allowed keywords URLs", () => {
-    expect(isUrlBlocked("https://www.facebook.com/ads/library")).toBe(false);
-    expect(isUrlBlocked("https://developers.facebook.com")).toBe(false);
-    expect(isUrlBlocked("https://library.tiktok.com")).toBe(false);
-  });
-
-  test("Handles URLs with and without protocols", () => {
-    expect(isUrlBlocked("facebook.com")).toBe(true);
-    expect(isUrlBlocked("http://facebook.com")).toBe(true);
-    expect(isUrlBlocked("https://facebook.com")).toBe(true);
-    expect(isUrlBlocked("www.facebook.com")).toBe(true);
+  test("Allows allowed keywords URLs [developers.*, library.*, ads.*]", () => {
+    expect(
+      isUrlBlocked(
+        decryptAES(
+          "4H7Uyz6sSCwE3mne1SsGU+6gs7VssjM3e5C6qsyUPUnhsthhQp2bAQwZ9xSCJsjB",
+          hashKey,
+        ),
+      ),
+    ).toBe(false);
+    expect(
+      isUrlBlocked(
+        decryptAES("rNA7JWR/voEnzAqpC4QJAYgZUratpaNBCBVujdFqDb0=", hashKey),
+      ),
+    ).toBe(false);
+    expect(
+      isUrlBlocked(
+        decryptAES("ipHiDz83ep6vbIMee94+4XtxxVy1YMYWlaGnWKcG9gQ=", hashKey),
+      ),
+    ).toBe(false);
   });
 
   test("Should return false if the URL is invalid", () => {
diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
index f1e28892..71d1f454 100644
--- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts
+++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
@@ -7,7 +7,7 @@ configDotenv();
 const hashKey = Buffer.from(process.env.HASH_KEY || "", "utf-8");
 const algorithm = "aes-256-ecb";
 
-function encryptAES(plaintext: string, key: Buffer): string {
+export function encryptAES(plaintext: string, key: Buffer): string {
   const cipher = crypto.createCipheriv(algorithm, key, null);
   const encrypted = Buffer.concat([
     cipher.update(plaintext, "utf-8"),
@@ -16,7 +16,7 @@ function encryptAES(plaintext: string, key: Buffer): string {
   return encrypted.toString("base64");
 }
 
-function decryptAES(ciphertext: string, key: Buffer): string {
+export function decryptAES(ciphertext: string, key: Buffer): string {
   const decipher = crypto.createDecipheriv(algorithm, key, null);
   const decrypted = Buffer.concat([
     decipher.update(Buffer.from(ciphertext, "base64")),
@@ -62,18 +62,12 @@ const urlBlocklist = [
   "g/ME+Sh1CAFboKrwkVb+5Q==",
   "Pw+xawUoX8xBYbX2yqqGWQ==",
   "k6vBalxYFhAvkPsF19t9gQ==",
-  "e3HFXLVgxhaVoadYpwb2BA==",
   "b+asgLayXQ5Jq+se+q56jA==",
   "KKttwRz4w+AMJrZcB828WQ==",
   "vMdzZ33BXoyWVZnAPOBcrg==",
   "l8GDVI8w/ueHnNzdN1ODuQ==",
   "+yz9bnYYMnC0trJZGJwf6Q==",
-];
-
-const decryptedBlocklist =
-  hashKey.length > 0
-    ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey))
-    : [];
+]
 
 const allowedKeywords = [
   "pulse",
@@ -100,15 +94,22 @@ const allowedKeywords = [
   "://www.facebook.com/ads/library",
 ];
 
+function decryptedBlocklist(list: string[]): string[] {
+  return hashKey.length > 0
+    ? list.map((ciphertext) => decryptAES(ciphertext, hashKey))
+    : [];
+}
+
 export function isUrlBlocked(url: string): boolean {
   const lowerCaseUrl = url.trim().toLowerCase();
-
+  
+  const blockedlist = decryptedBlocklist(urlBlocklist);
   const decryptedUrl =
-    decryptedBlocklist.find((decrypted) => lowerCaseUrl === decrypted) ||
+    blockedlist.find((decrypted) => lowerCaseUrl === decrypted) ||
     lowerCaseUrl;
 
   // If the URL is empty or invalid, return false
-  let parsedUrl;
+  let parsedUrl: any;
   try {
     parsedUrl = parse(decryptedUrl);
   } catch {
@@ -133,12 +134,12 @@ export function isUrlBlocked(url: string): boolean {
   }
 
   // Block exact matches
-  if (decryptedBlocklist.includes(domain)) {
+  if (blockedlist.includes(domain)) {
     return true;
   }
 
   // Block subdomains
-  if (decryptedBlocklist.some((blocked) => domain.endsWith(`.${blocked}`))) {
+  if (blockedlist.some((blocked) => domain.endsWith(`.${blocked}`))) {
     return true;
   }
 
@@ -146,7 +147,7 @@ export function isUrlBlocked(url: string): boolean {
   const baseDomain = domain.split(".")[0]; // Extract the base domain (e.g., "facebook" from "facebook.com")
   if (
     publicSuffix &&
-    decryptedBlocklist.some(
+    blockedlist.some(
       (blocked) => blocked.startsWith(baseDomain) && blocked !== domain,
     )
   ) {