diff --git a/apps/api/src/__tests__/snips/scrape.test.ts b/apps/api/src/__tests__/snips/scrape.test.ts index a27057f7..156e8e15 100644 --- a/apps/api/src/__tests__/snips/scrape.test.ts +++ b/apps/api/src/__tests__/snips/scrape.test.ts @@ -77,24 +77,24 @@ describe("Scrape tests", () => { expect(JSON.stringify(status)).toBe(JSON.stringify(response)); }, 60000); - // describe("Ad blocking (f-e dependant)", () => { - // it.concurrent("blocks ads by default", async () => { - // const response = await scrape({ - // url: "https://www.allrecipes.com/recipe/18185/yum/", - // }); + describe("Ad blocking (f-e dependant)", () => { + it.concurrent("blocks ads by default", async () => { + const response = await scrape({ + url: "https://www.allrecipes.com/recipe/18185/yum/", + }); - // expect(response.markdown).not.toContain(".g.doubleclick.net/"); - // }, 30000); + expect(response.markdown).not.toContain(".g.doubleclick.net/"); + }, 30000); - // it.concurrent("doesn't block ads if explicitly disabled", async () => { - // const response = await scrape({ - // url: "https://www.allrecipes.com/recipe/18185/yum/", - // blockAds: false, - // }); + it.concurrent("doesn't block ads if explicitly disabled", async () => { + const response = await scrape({ + url: "https://www.allrecipes.com/recipe/18185/yum/", + blockAds: false, + }); - // expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//); - // }, 30000); - // }); + expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//); + }, 30000); + }); describe("Change Tracking format", () => { it.concurrent("works", async () => { diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts index 8bbacb4c..3e8490c4 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts @@ -224,7 +224,6 @@ export async function scrapeURLWithFireEngineChromeCDP( mobile: meta.options.mobile, timeout, // TODO: better timeout logic disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache, - blockAds: meta.options.blockAds, mobileProxy: meta.featureFlags.has("stealthProxy"), saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS, // TODO: scrollXPaths diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts index e0c44e18..5261b697 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts @@ -12,7 +12,6 @@ export type FireEngineScrapeRequestCommon = { headers?: { [K: string]: string }; blockMedia?: boolean; // default: true - blockAds?: boolean; // default: true // pageOptions?: any; // unused, .scrollXPaths is considered on FE side // useProxy?: boolean; // unused, default: true @@ -39,7 +38,6 @@ export type FireEngineScrapeRequestChromeCDP = { blockMedia?: true; // cannot be false mobile?: boolean; disableSmartWaitCache?: boolean; - blockAds?: boolean; // default: true saveScrapeResultToGCS?: boolean; }; @@ -58,7 +56,6 @@ export type FireEngineScrapeRequestTLSClient = { engine: "tlsclient"; atsv?: boolean; // v0 only, default: false disableJsDom?: boolean; // v0 only, default: false - // blockAds?: boolean; // default: true }; const schema = z.object({ diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index fe0ae8c7..c690ac7b 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -69,6 +69,7 @@ export const featureFlags = [ "skipTlsVerification", "useFastMode", "stealthProxy", + "disableAdblock", ] as const; export type FeatureFlag = (typeof featureFlags)[number]; @@ -90,6 +91,7 @@ export const featureFlagOptions: { mobile: { priority: 10 }, skipTlsVerification: { priority: 10 }, stealthProxy: { priority: 20 }, + disableAdblock: { priority: 10 }, } as const; export type EngineScrapeResult = { @@ -160,6 +162,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: false, stealthProxy: false, + disableAdblock: false, }, quality: 1000, // cache should always be tried first }, @@ -177,6 +180,7 @@ export const engineOptions: { skipTlsVerification: true, useFastMode: false, stealthProxy: false, + disableAdblock: false, }, quality: 50, }, @@ -194,6 +198,7 @@ export const engineOptions: { skipTlsVerification: true, useFastMode: false, stealthProxy: false, + disableAdblock: false, }, quality: 45, }, @@ -211,6 +216,7 @@ export const engineOptions: { skipTlsVerification: true, useFastMode: false, stealthProxy: true, + disableAdblock: false, }, quality: -1, }, @@ -228,6 +234,7 @@ export const engineOptions: { skipTlsVerification: true, useFastMode: false, stealthProxy: true, + disableAdblock: false, }, quality: -5, }, @@ -245,6 +252,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: false, stealthProxy: false, + disableAdblock: true, }, quality: 40, }, @@ -262,6 +270,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: false, stealthProxy: true, + disableAdblock: true, }, quality: -10, }, @@ -279,6 +288,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: false, stealthProxy: false, + disableAdblock: false, }, quality: 20, }, @@ -296,6 +306,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: true, stealthProxy: false, + disableAdblock: false, }, quality: 10, }, @@ -313,6 +324,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: true, stealthProxy: true, + disableAdblock: false, }, quality: -15, }, @@ -330,6 +342,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: true, stealthProxy: false, + disableAdblock: false, }, quality: 5, }, @@ -347,6 +360,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: true, stealthProxy: true, // kinda... + disableAdblock: true, }, quality: -20, }, @@ -364,6 +378,7 @@ export const engineOptions: { skipTlsVerification: false, useFastMode: true, stealthProxy: true, // kinda... + disableAdblock: true, }, quality: -20, }, diff --git a/apps/api/src/scraper/scrapeURL/index.ts b/apps/api/src/scraper/scrapeURL/index.ts index 90873ca6..2f926a96 100644 --- a/apps/api/src/scraper/scrapeURL/index.ts +++ b/apps/api/src/scraper/scrapeURL/index.ts @@ -118,6 +118,10 @@ function buildFeatureFlags( flags.add("docx"); } + if (options.blockAds === false) { + flags.add("disableAdblock"); + } + return flags; }