mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 21:25:56 +08:00
fix(scrapeURL): only allow disabling the adblock on playwright (FIR-2200) (#1616)
* fix(scrapeURL): only allow disabling the adblock on playwright * feat(api/tests/scrape): re-enable ad blocking tests
This commit is contained in:
parent
7a8be13220
commit
4167ec53eb
@ -77,24 +77,24 @@ describe("Scrape tests", () => {
|
|||||||
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
|
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
|
||||||
}, 60000);
|
}, 60000);
|
||||||
|
|
||||||
// describe("Ad blocking (f-e dependant)", () => {
|
describe("Ad blocking (f-e dependant)", () => {
|
||||||
// it.concurrent("blocks ads by default", async () => {
|
it.concurrent("blocks ads by default", async () => {
|
||||||
// const response = await scrape({
|
const response = await scrape({
|
||||||
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||||
// });
|
});
|
||||||
|
|
||||||
// expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||||
// }, 30000);
|
}, 30000);
|
||||||
|
|
||||||
// it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||||
// const response = await scrape({
|
const response = await scrape({
|
||||||
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||||
// blockAds: false,
|
blockAds: false,
|
||||||
// });
|
});
|
||||||
|
|
||||||
// expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
||||||
// }, 30000);
|
}, 30000);
|
||||||
// });
|
});
|
||||||
|
|
||||||
describe("Change Tracking format", () => {
|
describe("Change Tracking format", () => {
|
||||||
it.concurrent("works", async () => {
|
it.concurrent("works", async () => {
|
||||||
|
@ -224,7 +224,6 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
|||||||
mobile: meta.options.mobile,
|
mobile: meta.options.mobile,
|
||||||
timeout, // TODO: better timeout logic
|
timeout, // TODO: better timeout logic
|
||||||
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
|
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
|
||||||
blockAds: meta.options.blockAds,
|
|
||||||
mobileProxy: meta.featureFlags.has("stealthProxy"),
|
mobileProxy: meta.featureFlags.has("stealthProxy"),
|
||||||
saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS,
|
saveScrapeResultToGCS: meta.internalOptions.saveScrapeResultToGCS,
|
||||||
// TODO: scrollXPaths
|
// TODO: scrollXPaths
|
||||||
|
@ -12,7 +12,6 @@ export type FireEngineScrapeRequestCommon = {
|
|||||||
headers?: { [K: string]: string };
|
headers?: { [K: string]: string };
|
||||||
|
|
||||||
blockMedia?: boolean; // default: true
|
blockMedia?: boolean; // default: true
|
||||||
blockAds?: boolean; // default: true
|
|
||||||
// pageOptions?: any; // unused, .scrollXPaths is considered on FE side
|
// pageOptions?: any; // unused, .scrollXPaths is considered on FE side
|
||||||
|
|
||||||
// useProxy?: boolean; // unused, default: true
|
// useProxy?: boolean; // unused, default: true
|
||||||
@ -39,7 +38,6 @@ export type FireEngineScrapeRequestChromeCDP = {
|
|||||||
blockMedia?: true; // cannot be false
|
blockMedia?: true; // cannot be false
|
||||||
mobile?: boolean;
|
mobile?: boolean;
|
||||||
disableSmartWaitCache?: boolean;
|
disableSmartWaitCache?: boolean;
|
||||||
blockAds?: boolean; // default: true
|
|
||||||
saveScrapeResultToGCS?: boolean;
|
saveScrapeResultToGCS?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -58,7 +56,6 @@ export type FireEngineScrapeRequestTLSClient = {
|
|||||||
engine: "tlsclient";
|
engine: "tlsclient";
|
||||||
atsv?: boolean; // v0 only, default: false
|
atsv?: boolean; // v0 only, default: false
|
||||||
disableJsDom?: boolean; // v0 only, default: false
|
disableJsDom?: boolean; // v0 only, default: false
|
||||||
// blockAds?: boolean; // default: true
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const schema = z.object({
|
const schema = z.object({
|
||||||
|
@ -69,6 +69,7 @@ export const featureFlags = [
|
|||||||
"skipTlsVerification",
|
"skipTlsVerification",
|
||||||
"useFastMode",
|
"useFastMode",
|
||||||
"stealthProxy",
|
"stealthProxy",
|
||||||
|
"disableAdblock",
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
export type FeatureFlag = (typeof featureFlags)[number];
|
export type FeatureFlag = (typeof featureFlags)[number];
|
||||||
@ -90,6 +91,7 @@ export const featureFlagOptions: {
|
|||||||
mobile: { priority: 10 },
|
mobile: { priority: 10 },
|
||||||
skipTlsVerification: { priority: 10 },
|
skipTlsVerification: { priority: 10 },
|
||||||
stealthProxy: { priority: 20 },
|
stealthProxy: { priority: 20 },
|
||||||
|
disableAdblock: { priority: 10 },
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
export type EngineScrapeResult = {
|
export type EngineScrapeResult = {
|
||||||
@ -160,6 +162,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: 1000, // cache should always be tried first
|
quality: 1000, // cache should always be tried first
|
||||||
},
|
},
|
||||||
@ -177,6 +180,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: true,
|
skipTlsVerification: true,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: 50,
|
quality: 50,
|
||||||
},
|
},
|
||||||
@ -194,6 +198,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: true,
|
skipTlsVerification: true,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: 45,
|
quality: 45,
|
||||||
},
|
},
|
||||||
@ -211,6 +216,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: true,
|
skipTlsVerification: true,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: true,
|
stealthProxy: true,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: -1,
|
quality: -1,
|
||||||
},
|
},
|
||||||
@ -228,6 +234,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: true,
|
skipTlsVerification: true,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: true,
|
stealthProxy: true,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: -5,
|
quality: -5,
|
||||||
},
|
},
|
||||||
@ -245,6 +252,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: true,
|
||||||
},
|
},
|
||||||
quality: 40,
|
quality: 40,
|
||||||
},
|
},
|
||||||
@ -262,6 +270,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: true,
|
stealthProxy: true,
|
||||||
|
disableAdblock: true,
|
||||||
},
|
},
|
||||||
quality: -10,
|
quality: -10,
|
||||||
},
|
},
|
||||||
@ -279,6 +288,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: false,
|
useFastMode: false,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: 20,
|
quality: 20,
|
||||||
},
|
},
|
||||||
@ -296,6 +306,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: true,
|
useFastMode: true,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: 10,
|
quality: 10,
|
||||||
},
|
},
|
||||||
@ -313,6 +324,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: true,
|
useFastMode: true,
|
||||||
stealthProxy: true,
|
stealthProxy: true,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: -15,
|
quality: -15,
|
||||||
},
|
},
|
||||||
@ -330,6 +342,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: true,
|
useFastMode: true,
|
||||||
stealthProxy: false,
|
stealthProxy: false,
|
||||||
|
disableAdblock: false,
|
||||||
},
|
},
|
||||||
quality: 5,
|
quality: 5,
|
||||||
},
|
},
|
||||||
@ -347,6 +360,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: true,
|
useFastMode: true,
|
||||||
stealthProxy: true, // kinda...
|
stealthProxy: true, // kinda...
|
||||||
|
disableAdblock: true,
|
||||||
},
|
},
|
||||||
quality: -20,
|
quality: -20,
|
||||||
},
|
},
|
||||||
@ -364,6 +378,7 @@ export const engineOptions: {
|
|||||||
skipTlsVerification: false,
|
skipTlsVerification: false,
|
||||||
useFastMode: true,
|
useFastMode: true,
|
||||||
stealthProxy: true, // kinda...
|
stealthProxy: true, // kinda...
|
||||||
|
disableAdblock: true,
|
||||||
},
|
},
|
||||||
quality: -20,
|
quality: -20,
|
||||||
},
|
},
|
||||||
|
@ -118,6 +118,10 @@ function buildFeatureFlags(
|
|||||||
flags.add("docx");
|
flags.add("docx");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (options.blockAds === false) {
|
||||||
|
flags.add("disableAdblock");
|
||||||
|
}
|
||||||
|
|
||||||
return flags;
|
return flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user