fix(scrapeURL): only allow disabling the adblock on playwright (FIR-2200) (#1616)

* fix(scrapeURL): only allow disabling the adblock on playwright

* feat(api/tests/scrape): re-enable ad blocking tests
This commit is contained in:
Gergő Móricz 2025-06-02 22:48:16 +02:00 committed by GitHub
parent 7a8be13220
commit 4167ec53eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 34 additions and 19 deletions

View File

@ -77,24 +77,24 @@ describe("Scrape tests", () => {
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
}, 60000);
// describe("Ad blocking (f-e dependant)", () => {
// it.concurrent("blocks ads by default", async () => {
// const response = await scrape({
// url: "https://www.allrecipes.com/recipe/18185/yum/",
// });
describe("Ad blocking (f-e dependant)", () => {
it.concurrent("blocks ads by default", async () => {
const response = await scrape({
url: "https://www.allrecipes.com/recipe/18185/yum/",
});
// expect(response.markdown).not.toContain(".g.doubleclick.net/");
// }, 30000);
expect(response.markdown).not.toContain(".g.doubleclick.net/");
}, 30000);
// it.concurrent("doesn't block ads if explicitly disabled", async () => {
// const response = await scrape({
// url: "https://www.allrecipes.com/recipe/18185/yum/",
// blockAds: false,
// });
it.concurrent("doesn't block ads if explicitly disabled", async () => {
const response = await scrape({
url: "https://www.allrecipes.com/recipe/18185/yum/",
blockAds: false,
});
// expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
// }, 30000);
// });
expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
}, 30000);
});
describe("Change Tracking format", () => {
it.concurrent("works", async () => {

View File

@ -224,7 +224,6 @@ export async function scrapeURLWithFireEngineChromeCDP(
mobile: meta.options.mobile,
timeout, // TODO: better timeout logic
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
blockAds: meta.options.blockAds,
mobileProxy: meta.featureFlags.has("stealthProxy"),
saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS,
// TODO: scrollXPaths

View File

@ -12,7 +12,6 @@ export type FireEngineScrapeRequestCommon = {
headers?: { [K: string]: string };
blockMedia?: boolean; // default: true
blockAds?: boolean; // default: true
// pageOptions?: any; // unused, .scrollXPaths is considered on FE side
// useProxy?: boolean; // unused, default: true
@ -39,7 +38,6 @@ export type FireEngineScrapeRequestChromeCDP = {
blockMedia?: true; // cannot be false
mobile?: boolean;
disableSmartWaitCache?: boolean;
blockAds?: boolean; // default: true
saveScrapeResultToGCS?: boolean;
};
@ -58,7 +56,6 @@ export type FireEngineScrapeRequestTLSClient = {
engine: "tlsclient";
atsv?: boolean; // v0 only, default: false
disableJsDom?: boolean; // v0 only, default: false
// blockAds?: boolean; // default: true
};
const schema = z.object({

View File

@ -69,6 +69,7 @@ export const featureFlags = [
"skipTlsVerification",
"useFastMode",
"stealthProxy",
"disableAdblock",
] as const;
export type FeatureFlag = (typeof featureFlags)[number];
@ -90,6 +91,7 @@ export const featureFlagOptions: {
mobile: { priority: 10 },
skipTlsVerification: { priority: 10 },
stealthProxy: { priority: 20 },
disableAdblock: { priority: 10 },
} as const;
export type EngineScrapeResult = {
@ -160,6 +162,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: false,
stealthProxy: false,
disableAdblock: false,
},
quality: 1000, // cache should always be tried first
},
@ -177,6 +180,7 @@ export const engineOptions: {
skipTlsVerification: true,
useFastMode: false,
stealthProxy: false,
disableAdblock: false,
},
quality: 50,
},
@ -194,6 +198,7 @@ export const engineOptions: {
skipTlsVerification: true,
useFastMode: false,
stealthProxy: false,
disableAdblock: false,
},
quality: 45,
},
@ -211,6 +216,7 @@ export const engineOptions: {
skipTlsVerification: true,
useFastMode: false,
stealthProxy: true,
disableAdblock: false,
},
quality: -1,
},
@ -228,6 +234,7 @@ export const engineOptions: {
skipTlsVerification: true,
useFastMode: false,
stealthProxy: true,
disableAdblock: false,
},
quality: -5,
},
@ -245,6 +252,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: false,
stealthProxy: false,
disableAdblock: true,
},
quality: 40,
},
@ -262,6 +270,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: false,
stealthProxy: true,
disableAdblock: true,
},
quality: -10,
},
@ -279,6 +288,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: false,
stealthProxy: false,
disableAdblock: false,
},
quality: 20,
},
@ -296,6 +306,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: true,
stealthProxy: false,
disableAdblock: false,
},
quality: 10,
},
@ -313,6 +324,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: true,
stealthProxy: true,
disableAdblock: false,
},
quality: -15,
},
@ -330,6 +342,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: true,
stealthProxy: false,
disableAdblock: false,
},
quality: 5,
},
@ -347,6 +360,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: true,
stealthProxy: true, // kinda...
disableAdblock: true,
},
quality: -20,
},
@ -364,6 +378,7 @@ export const engineOptions: {
skipTlsVerification: false,
useFastMode: true,
stealthProxy: true, // kinda...
disableAdblock: true,
},
quality: -20,
},

View File

@ -118,6 +118,10 @@ function buildFeatureFlags(
flags.add("docx");
}
if (options.blockAds === false) {
flags.add("disableAdblock");
}
return flags;
}