mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 07:39:00 +08:00
feat(scrapeUrl/fire-engine): add blockAds flag (FIR-692) (#1106)
* feat(scrapeUrl/fire-engine): add blockAds flag * feat(v1/scrape): blockAds test
This commit is contained in:
parent
5733b82e9d
commit
d09e0603f8
@ -36,7 +36,28 @@ describe("Scrape tests", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("Location API", () => {
|
describe("Ad blocking (f-e dependant)", () => {
|
||||||
|
it.concurrent("blocks ads by default", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "https://canyoublockit.com/testing/",
|
||||||
|
});
|
||||||
|
|
||||||
|
expectScrapeToSucceed(response);
|
||||||
|
expect(response.body.data.markdown).not.toContain(".g.doubleclick.net/");
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
|
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "https://canyoublockit.com/testing/",
|
||||||
|
blockAds: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
expectScrapeToSucceed(response);
|
||||||
|
expect(response.body.data.markdown).toContain(".g.doubleclick.net/");
|
||||||
|
}, 10000);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("Location API (f-e dependant)", () => {
|
||||||
it.concurrent("works without specifying an explicit location", async () => {
|
it.concurrent("works without specifying an explicit location", async () => {
|
||||||
const response = await scrape({
|
const response = await scrape({
|
||||||
url: "https://iplocation.com",
|
url: "https://iplocation.com",
|
||||||
@ -54,5 +75,5 @@ describe("Scrape tests", () => {
|
|||||||
expectScrapeToSucceed(response);
|
expectScrapeToSucceed(response);
|
||||||
expect(response.body.data.markdown).toContain("| Country | United States |");
|
expect(response.body.data.markdown).toContain("| Country | United States |");
|
||||||
});
|
});
|
||||||
})
|
});
|
||||||
});
|
});
|
||||||
|
@ -186,6 +186,7 @@ export const scrapeOptions = z
|
|||||||
removeBase64Images: z.boolean().default(true),
|
removeBase64Images: z.boolean().default(true),
|
||||||
fastMode: z.boolean().default(false),
|
fastMode: z.boolean().default(false),
|
||||||
useMock: z.string().optional(),
|
useMock: z.string().optional(),
|
||||||
|
blockAds: z.boolean().default(true),
|
||||||
})
|
})
|
||||||
.strict(strictMessage);
|
.strict(strictMessage);
|
||||||
|
|
||||||
|
@ -197,6 +197,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
|||||||
mobile: meta.options.mobile,
|
mobile: meta.options.mobile,
|
||||||
timeout, // TODO: better timeout logic
|
timeout, // TODO: better timeout logic
|
||||||
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
|
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
|
||||||
|
blockAds: meta.options.blockAds,
|
||||||
// TODO: scrollXPaths
|
// TODO: scrollXPaths
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -271,6 +272,7 @@ export async function scrapeURLWithFireEnginePlaywright(
|
|||||||
fullPageScreenshot: meta.options.formats.includes("screenshot@fullPage"),
|
fullPageScreenshot: meta.options.formats.includes("screenshot@fullPage"),
|
||||||
wait: meta.options.waitFor,
|
wait: meta.options.waitFor,
|
||||||
geolocation: meta.options.geolocation ?? meta.options.location,
|
geolocation: meta.options.geolocation ?? meta.options.location,
|
||||||
|
blockAds: meta.options.blockAds,
|
||||||
|
|
||||||
timeout,
|
timeout,
|
||||||
};
|
};
|
||||||
|
@ -37,6 +37,7 @@ export type FireEngineScrapeRequestChromeCDP = {
|
|||||||
blockMedia?: true; // cannot be false
|
blockMedia?: true; // cannot be false
|
||||||
mobile?: boolean;
|
mobile?: boolean;
|
||||||
disableSmartWaitCache?: boolean;
|
disableSmartWaitCache?: boolean;
|
||||||
|
blockAds?: boolean; // default: true
|
||||||
};
|
};
|
||||||
|
|
||||||
export type FireEngineScrapeRequestPlaywright = {
|
export type FireEngineScrapeRequestPlaywright = {
|
||||||
|
@ -93,6 +93,7 @@ export interface CrawlScrapeOptions {
|
|||||||
mobile?: boolean;
|
mobile?: boolean;
|
||||||
skipTlsVerification?: boolean;
|
skipTlsVerification?: boolean;
|
||||||
removeBase64Images?: boolean;
|
removeBase64Images?: boolean;
|
||||||
|
blockAds?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type Action = {
|
export type Action = {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user