mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 04:29:01 +08:00
feat(scrapeUrl/fire-engine): add blockAds flag (FIR-692) (#1106)
* feat(scrapeUrl/fire-engine): add blockAds flag * feat(v1/scrape): blockAds test
This commit is contained in:
parent
5733b82e9d
commit
d09e0603f8
@ -36,7 +36,28 @@ describe("Scrape tests", () => {
|
||||
);
|
||||
});
|
||||
|
||||
describe("Location API", () => {
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
});
|
||||
|
||||
expectScrapeToSucceed(response);
|
||||
expect(response.body.data.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
blockAds: false,
|
||||
});
|
||||
|
||||
expectScrapeToSucceed(response);
|
||||
expect(response.body.data.markdown).toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("Location API (f-e dependant)", () => {
|
||||
it.concurrent("works without specifying an explicit location", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
@ -54,5 +75,5 @@ describe("Scrape tests", () => {
|
||||
expectScrapeToSucceed(response);
|
||||
expect(response.body.data.markdown).toContain("| Country | United States |");
|
||||
});
|
||||
})
|
||||
});
|
||||
});
|
||||
|
@ -186,6 +186,7 @@ export const scrapeOptions = z
|
||||
removeBase64Images: z.boolean().default(true),
|
||||
fastMode: z.boolean().default(false),
|
||||
useMock: z.string().optional(),
|
||||
blockAds: z.boolean().default(true),
|
||||
})
|
||||
.strict(strictMessage);
|
||||
|
||||
|
@ -197,6 +197,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
||||
mobile: meta.options.mobile,
|
||||
timeout, // TODO: better timeout logic
|
||||
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
|
||||
blockAds: meta.options.blockAds,
|
||||
// TODO: scrollXPaths
|
||||
};
|
||||
|
||||
@ -271,6 +272,7 @@ export async function scrapeURLWithFireEnginePlaywright(
|
||||
fullPageScreenshot: meta.options.formats.includes("screenshot@fullPage"),
|
||||
wait: meta.options.waitFor,
|
||||
geolocation: meta.options.geolocation ?? meta.options.location,
|
||||
blockAds: meta.options.blockAds,
|
||||
|
||||
timeout,
|
||||
};
|
||||
|
@ -37,6 +37,7 @@ export type FireEngineScrapeRequestChromeCDP = {
|
||||
blockMedia?: true; // cannot be false
|
||||
mobile?: boolean;
|
||||
disableSmartWaitCache?: boolean;
|
||||
blockAds?: boolean; // default: true
|
||||
};
|
||||
|
||||
export type FireEngineScrapeRequestPlaywright = {
|
||||
|
@ -93,6 +93,7 @@ export interface CrawlScrapeOptions {
|
||||
mobile?: boolean;
|
||||
skipTlsVerification?: boolean;
|
||||
removeBase64Images?: boolean;
|
||||
blockAds?: boolean;
|
||||
}
|
||||
|
||||
export type Action = {
|
||||
|
Loading…
x
Reference in New Issue
Block a user