fix(scrapeURL): only allow disabling the adblock on playwright (FIR-2200) (#1616)

* fix(scrapeURL): only allow disabling the adblock on playwright

* feat(api/tests/scrape): re-enable ad blocking tests
This commit is contained in:
Gergő Móricz 2025-06-02 22:48:16 +02:00 committed by GitHub
parent 7a8be13220
commit 4167ec53eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 34 additions and 19 deletions

View File

@ -77,24 +77,24 @@ describe("Scrape tests", () => {
expect(JSON.stringify(status)).toBe(JSON.stringify(response)); expect(JSON.stringify(status)).toBe(JSON.stringify(response));
}, 60000); }, 60000);
// describe("Ad blocking (f-e dependant)", () => { describe("Ad blocking (f-e dependant)", () => {
// it.concurrent("blocks ads by default", async () => { it.concurrent("blocks ads by default", async () => {
// const response = await scrape({ const response = await scrape({
// url: "https://www.allrecipes.com/recipe/18185/yum/", url: "https://www.allrecipes.com/recipe/18185/yum/",
// }); });
// expect(response.markdown).not.toContain(".g.doubleclick.net/"); expect(response.markdown).not.toContain(".g.doubleclick.net/");
// }, 30000); }, 30000);
// it.concurrent("doesn't block ads if explicitly disabled", async () => { it.concurrent("doesn't block ads if explicitly disabled", async () => {
// const response = await scrape({ const response = await scrape({
// url: "https://www.allrecipes.com/recipe/18185/yum/", url: "https://www.allrecipes.com/recipe/18185/yum/",
// blockAds: false, blockAds: false,
// }); });
// expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//); expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
// }, 30000); }, 30000);
// }); });
describe("Change Tracking format", () => { describe("Change Tracking format", () => {
it.concurrent("works", async () => { it.concurrent("works", async () => {

View File

@ -224,7 +224,6 @@ export async function scrapeURLWithFireEngineChromeCDP(
mobile: meta.options.mobile, mobile: meta.options.mobile,
timeout, // TODO: better timeout logic timeout, // TODO: better timeout logic
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache, disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
blockAds: meta.options.blockAds,
mobileProxy: meta.featureFlags.has("stealthProxy"), mobileProxy: meta.featureFlags.has("stealthProxy"),
saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS, saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS,
// TODO: scrollXPaths // TODO: scrollXPaths

View File

@ -12,7 +12,6 @@ export type FireEngineScrapeRequestCommon = {
headers?: { [K: string]: string }; headers?: { [K: string]: string };
blockMedia?: boolean; // default: true blockMedia?: boolean; // default: true
blockAds?: boolean; // default: true
// pageOptions?: any; // unused, .scrollXPaths is considered on FE side // pageOptions?: any; // unused, .scrollXPaths is considered on FE side
// useProxy?: boolean; // unused, default: true // useProxy?: boolean; // unused, default: true
@ -39,7 +38,6 @@ export type FireEngineScrapeRequestChromeCDP = {
blockMedia?: true; // cannot be false blockMedia?: true; // cannot be false
mobile?: boolean; mobile?: boolean;
disableSmartWaitCache?: boolean; disableSmartWaitCache?: boolean;
blockAds?: boolean; // default: true
saveScrapeResultToGCS?: boolean; saveScrapeResultToGCS?: boolean;
}; };
@ -58,7 +56,6 @@ export type FireEngineScrapeRequestTLSClient = {
engine: "tlsclient"; engine: "tlsclient";
atsv?: boolean; // v0 only, default: false atsv?: boolean; // v0 only, default: false
disableJsDom?: boolean; // v0 only, default: false disableJsDom?: boolean; // v0 only, default: false
// blockAds?: boolean; // default: true
}; };
const schema = z.object({ const schema = z.object({

View File

@ -69,6 +69,7 @@ export const featureFlags = [
"skipTlsVerification", "skipTlsVerification",
"useFastMode", "useFastMode",
"stealthProxy", "stealthProxy",
"disableAdblock",
] as const; ] as const;
export type FeatureFlag = (typeof featureFlags)[number]; export type FeatureFlag = (typeof featureFlags)[number];
@ -90,6 +91,7 @@ export const featureFlagOptions: {
mobile: { priority: 10 }, mobile: { priority: 10 },
skipTlsVerification: { priority: 10 }, skipTlsVerification: { priority: 10 },
stealthProxy: { priority: 20 }, stealthProxy: { priority: 20 },
disableAdblock: { priority: 10 },
} as const; } as const;
export type EngineScrapeResult = { export type EngineScrapeResult = {
@ -160,6 +162,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: false, useFastMode: false,
stealthProxy: false, stealthProxy: false,
disableAdblock: false,
}, },
quality: 1000, // cache should always be tried first quality: 1000, // cache should always be tried first
}, },
@ -177,6 +180,7 @@ export const engineOptions: {
skipTlsVerification: true, skipTlsVerification: true,
useFastMode: false, useFastMode: false,
stealthProxy: false, stealthProxy: false,
disableAdblock: false,
}, },
quality: 50, quality: 50,
}, },
@ -194,6 +198,7 @@ export const engineOptions: {
skipTlsVerification: true, skipTlsVerification: true,
useFastMode: false, useFastMode: false,
stealthProxy: false, stealthProxy: false,
disableAdblock: false,
}, },
quality: 45, quality: 45,
}, },
@ -211,6 +216,7 @@ export const engineOptions: {
skipTlsVerification: true, skipTlsVerification: true,
useFastMode: false, useFastMode: false,
stealthProxy: true, stealthProxy: true,
disableAdblock: false,
}, },
quality: -1, quality: -1,
}, },
@ -228,6 +234,7 @@ export const engineOptions: {
skipTlsVerification: true, skipTlsVerification: true,
useFastMode: false, useFastMode: false,
stealthProxy: true, stealthProxy: true,
disableAdblock: false,
}, },
quality: -5, quality: -5,
}, },
@ -245,6 +252,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: false, useFastMode: false,
stealthProxy: false, stealthProxy: false,
disableAdblock: true,
}, },
quality: 40, quality: 40,
}, },
@ -262,6 +270,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: false, useFastMode: false,
stealthProxy: true, stealthProxy: true,
disableAdblock: true,
}, },
quality: -10, quality: -10,
}, },
@ -279,6 +288,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: false, useFastMode: false,
stealthProxy: false, stealthProxy: false,
disableAdblock: false,
}, },
quality: 20, quality: 20,
}, },
@ -296,6 +306,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: true, useFastMode: true,
stealthProxy: false, stealthProxy: false,
disableAdblock: false,
}, },
quality: 10, quality: 10,
}, },
@ -313,6 +324,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: true, useFastMode: true,
stealthProxy: true, stealthProxy: true,
disableAdblock: false,
}, },
quality: -15, quality: -15,
}, },
@ -330,6 +342,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: true, useFastMode: true,
stealthProxy: false, stealthProxy: false,
disableAdblock: false,
}, },
quality: 5, quality: 5,
}, },
@ -347,6 +360,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: true, useFastMode: true,
stealthProxy: true, // kinda... stealthProxy: true, // kinda...
disableAdblock: true,
}, },
quality: -20, quality: -20,
}, },
@ -364,6 +378,7 @@ export const engineOptions: {
skipTlsVerification: false, skipTlsVerification: false,
useFastMode: true, useFastMode: true,
stealthProxy: true, // kinda... stealthProxy: true, // kinda...
disableAdblock: true,
}, },
quality: -20, quality: -20,
}, },

View File

@ -118,6 +118,10 @@ function buildFeatureFlags(
flags.add("docx"); flags.add("docx");
} }
if (options.blockAds === false) {
flags.add("disableAdblock");
}
return flags; return flags;
} }