mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 01:45:59 +08:00
fix(scrapeURL): only allow disabling the adblock on playwright (FIR-2200) (#1616)
* fix(scrapeURL): only allow disabling the adblock on playwright * feat(api/tests/scrape): re-enable ad blocking tests
This commit is contained in:
parent
7a8be13220
commit
4167ec53eb
@ -77,24 +77,24 @@ describe("Scrape tests", () => {
|
||||
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
|
||||
}, 60000);
|
||||
|
||||
// describe("Ad blocking (f-e dependant)", () => {
|
||||
// it.concurrent("blocks ads by default", async () => {
|
||||
// const response = await scrape({
|
||||
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
// });
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
});
|
||||
|
||||
// expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
// }, 30000);
|
||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 30000);
|
||||
|
||||
// it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
// const response = await scrape({
|
||||
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
// blockAds: false,
|
||||
// });
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
blockAds: false,
|
||||
});
|
||||
|
||||
// expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
||||
// }, 30000);
|
||||
// });
|
||||
expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
describe("Change Tracking format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
|
@ -224,7 +224,6 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
||||
mobile: meta.options.mobile,
|
||||
timeout, // TODO: better timeout logic
|
||||
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
|
||||
blockAds: meta.options.blockAds,
|
||||
mobileProxy: meta.featureFlags.has("stealthProxy"),
|
||||
saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS,
|
||||
// TODO: scrollXPaths
|
||||
|
@ -12,7 +12,6 @@ export type FireEngineScrapeRequestCommon = {
|
||||
headers?: { [K: string]: string };
|
||||
|
||||
blockMedia?: boolean; // default: true
|
||||
blockAds?: boolean; // default: true
|
||||
// pageOptions?: any; // unused, .scrollXPaths is considered on FE side
|
||||
|
||||
// useProxy?: boolean; // unused, default: true
|
||||
@ -39,7 +38,6 @@ export type FireEngineScrapeRequestChromeCDP = {
|
||||
blockMedia?: true; // cannot be false
|
||||
mobile?: boolean;
|
||||
disableSmartWaitCache?: boolean;
|
||||
blockAds?: boolean; // default: true
|
||||
saveScrapeResultToGCS?: boolean;
|
||||
};
|
||||
|
||||
@ -58,7 +56,6 @@ export type FireEngineScrapeRequestTLSClient = {
|
||||
engine: "tlsclient";
|
||||
atsv?: boolean; // v0 only, default: false
|
||||
disableJsDom?: boolean; // v0 only, default: false
|
||||
// blockAds?: boolean; // default: true
|
||||
};
|
||||
|
||||
const schema = z.object({
|
||||
|
@ -69,6 +69,7 @@ export const featureFlags = [
|
||||
"skipTlsVerification",
|
||||
"useFastMode",
|
||||
"stealthProxy",
|
||||
"disableAdblock",
|
||||
] as const;
|
||||
|
||||
export type FeatureFlag = (typeof featureFlags)[number];
|
||||
@ -90,6 +91,7 @@ export const featureFlagOptions: {
|
||||
mobile: { priority: 10 },
|
||||
skipTlsVerification: { priority: 10 },
|
||||
stealthProxy: { priority: 20 },
|
||||
disableAdblock: { priority: 10 },
|
||||
} as const;
|
||||
|
||||
export type EngineScrapeResult = {
|
||||
@ -160,6 +162,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: false,
|
||||
stealthProxy: false,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: 1000, // cache should always be tried first
|
||||
},
|
||||
@ -177,6 +180,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: true,
|
||||
useFastMode: false,
|
||||
stealthProxy: false,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: 50,
|
||||
},
|
||||
@ -194,6 +198,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: true,
|
||||
useFastMode: false,
|
||||
stealthProxy: false,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: 45,
|
||||
},
|
||||
@ -211,6 +216,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: true,
|
||||
useFastMode: false,
|
||||
stealthProxy: true,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: -1,
|
||||
},
|
||||
@ -228,6 +234,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: true,
|
||||
useFastMode: false,
|
||||
stealthProxy: true,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: -5,
|
||||
},
|
||||
@ -245,6 +252,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: false,
|
||||
stealthProxy: false,
|
||||
disableAdblock: true,
|
||||
},
|
||||
quality: 40,
|
||||
},
|
||||
@ -262,6 +270,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: false,
|
||||
stealthProxy: true,
|
||||
disableAdblock: true,
|
||||
},
|
||||
quality: -10,
|
||||
},
|
||||
@ -279,6 +288,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: false,
|
||||
stealthProxy: false,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: 20,
|
||||
},
|
||||
@ -296,6 +306,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: true,
|
||||
stealthProxy: false,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: 10,
|
||||
},
|
||||
@ -313,6 +324,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: true,
|
||||
stealthProxy: true,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: -15,
|
||||
},
|
||||
@ -330,6 +342,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: true,
|
||||
stealthProxy: false,
|
||||
disableAdblock: false,
|
||||
},
|
||||
quality: 5,
|
||||
},
|
||||
@ -347,6 +360,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: true,
|
||||
stealthProxy: true, // kinda...
|
||||
disableAdblock: true,
|
||||
},
|
||||
quality: -20,
|
||||
},
|
||||
@ -364,6 +378,7 @@ export const engineOptions: {
|
||||
skipTlsVerification: false,
|
||||
useFastMode: true,
|
||||
stealthProxy: true, // kinda...
|
||||
disableAdblock: true,
|
||||
},
|
||||
quality: -20,
|
||||
},
|
||||
|
@ -118,6 +118,10 @@ function buildFeatureFlags(
|
||||
flags.add("docx");
|
||||
}
|
||||
|
||||
if (options.blockAds === false) {
|
||||
flags.add("disableAdblock");
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user