feat(scrapeUrl/fire-engine): add blockAds flag (FIR-692) (#1106)

* feat(scrapeUrl/fire-engine): add blockAds flag

* feat(v1/scrape): blockAds test
This commit is contained in:
Gergő Móricz 2025-01-29 15:03:37 +01:00 committed by GitHub
parent 5733b82e9d
commit d09e0603f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 28 additions and 2 deletions

View File

@ -36,7 +36,28 @@ describe("Scrape tests", () => {
); );
}); });
describe("Location API", () => { describe("Ad blocking (f-e dependant)", () => {
it.concurrent("blocks ads by default", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
});
expectScrapeToSucceed(response);
expect(response.body.data.markdown).not.toContain(".g.doubleclick.net/");
}, 10000);
it.concurrent("doesn't block ads if explicitly disabled", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
blockAds: false,
});
expectScrapeToSucceed(response);
expect(response.body.data.markdown).toContain(".g.doubleclick.net/");
}, 10000);
});
describe("Location API (f-e dependant)", () => {
it.concurrent("works without specifying an explicit location", async () => { it.concurrent("works without specifying an explicit location", async () => {
const response = await scrape({ const response = await scrape({
url: "https://iplocation.com", url: "https://iplocation.com",
@ -54,5 +75,5 @@ describe("Scrape tests", () => {
expectScrapeToSucceed(response); expectScrapeToSucceed(response);
expect(response.body.data.markdown).toContain("| Country | United States |"); expect(response.body.data.markdown).toContain("| Country | United States |");
}); });
}) });
}); });

View File

@ -186,6 +186,7 @@ export const scrapeOptions = z
removeBase64Images: z.boolean().default(true), removeBase64Images: z.boolean().default(true),
fastMode: z.boolean().default(false), fastMode: z.boolean().default(false),
useMock: z.string().optional(), useMock: z.string().optional(),
blockAds: z.boolean().default(true),
}) })
.strict(strictMessage); .strict(strictMessage);

View File

@ -197,6 +197,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
mobile: meta.options.mobile, mobile: meta.options.mobile,
timeout, // TODO: better timeout logic timeout, // TODO: better timeout logic
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache, disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
blockAds: meta.options.blockAds,
// TODO: scrollXPaths // TODO: scrollXPaths
}; };
@ -271,6 +272,7 @@ export async function scrapeURLWithFireEnginePlaywright(
fullPageScreenshot: meta.options.formats.includes("screenshot@fullPage"), fullPageScreenshot: meta.options.formats.includes("screenshot@fullPage"),
wait: meta.options.waitFor, wait: meta.options.waitFor,
geolocation: meta.options.geolocation ?? meta.options.location, geolocation: meta.options.geolocation ?? meta.options.location,
blockAds: meta.options.blockAds,
timeout, timeout,
}; };

View File

@ -37,6 +37,7 @@ export type FireEngineScrapeRequestChromeCDP = {
blockMedia?: true; // cannot be false blockMedia?: true; // cannot be false
mobile?: boolean; mobile?: boolean;
disableSmartWaitCache?: boolean; disableSmartWaitCache?: boolean;
blockAds?: boolean; // default: true
}; };
export type FireEngineScrapeRequestPlaywright = { export type FireEngineScrapeRequestPlaywright = {

View File

@ -93,6 +93,7 @@ export interface CrawlScrapeOptions {
mobile?: boolean; mobile?: boolean;
skipTlsVerification?: boolean; skipTlsVerification?: boolean;
removeBase64Images?: boolean; removeBase64Images?: boolean;
blockAds?: boolean;
} }
export type Action = { export type Action = {