diff --git a/apps/api/src/__tests__/snips/scrape.test.ts b/apps/api/src/__tests__/snips/scrape.test.ts index 7970d820..754ef8e0 100644 --- a/apps/api/src/__tests__/snips/scrape.test.ts +++ b/apps/api/src/__tests__/snips/scrape.test.ts @@ -110,5 +110,27 @@ describe("Scrape tests", () => { expectScrapeToSucceed(response); expect(typeof response.body.data.screenshot).toBe("string"); }, 15000); - }) + }); + + describe("Proxy API (f-e dependant)", () => { + it.concurrent("undefined works", async () => { + await scrape({ + url: "http://firecrawl.dev", + }); + }, 15000); + + it.concurrent("basic works", async () => { + await scrape({ + url: "http://firecrawl.dev", + proxy: "basic", + }); + }, 15000); + + it.concurrent("stealth works", async () => { + await scrape({ + url: "http://firecrawl.dev", + proxy: "stealth", + }); + }, 15000); + }); }); diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 858c792f..b8bf4363 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -217,6 +217,7 @@ const baseScrapeOptions = z fastMode: z.boolean().default(false), useMock: z.string().optional(), blockAds: z.boolean().default(true), + proxy: z.enum(["basic", "stealth"]).optional(), }) .strict(strictMessage); diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts index 26f7847f..485b8eeb 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts @@ -207,6 +207,7 @@ export async function scrapeURLWithFireEngineChromeCDP( timeout, // TODO: better timeout logic disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache, blockAds: meta.options.blockAds, + mobileProxy: meta.options.proxy === undefined ? undefined : meta.options.proxy === "stealth" ? true : false, // TODO: scrollXPaths }; @@ -284,6 +285,7 @@ export async function scrapeURLWithFireEnginePlaywright( wait: meta.options.waitFor, geolocation: meta.options.geolocation ?? meta.options.location, blockAds: meta.options.blockAds, + mobileProxy: meta.options.proxy === undefined ? undefined : meta.options.proxy === "stealth" ? true : false, timeout, }; @@ -338,6 +340,7 @@ export async function scrapeURLWithFireEngineTLSClient( atsv: meta.internalOptions.atsv, geolocation: meta.options.geolocation ?? meta.options.location, disableJsDom: meta.internalOptions.v0DisableJsDom, + mobileProxy: meta.options.proxy === undefined ? undefined : meta.options.proxy === "stealth" ? true : false, timeout, }; diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts index f3eedde4..607c0c8d 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts @@ -27,6 +27,8 @@ export type FireEngineScrapeRequestCommon = { instantReturn?: boolean; // default: false geolocation?: { country?: string; languages?: string[] }; + mobileProxy?: boolean; // leave it undefined if user doesn't specify + timeout?: number; }; diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index 896e177b..f32708c0 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -67,6 +67,7 @@ export const featureFlags = [ "mobile", "skipTlsVerification", "useFastMode", + "stealthProxy", ] as const; export type FeatureFlag = (typeof featureFlags)[number]; @@ -87,6 +88,7 @@ export const featureFlagOptions: { location: { priority: 10 }, mobile: { priority: 10 }, skipTlsVerification: { priority: 10 }, + stealthProxy: { priority: 20 }, } as const; export type EngineScrapeResult = { @@ -145,6 +147,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: false, + stealthProxy: false, }, quality: 1000, // cache should always be tried first }, @@ -161,6 +164,7 @@ export const engineOptions: { mobile: true, skipTlsVerification: true, useFastMode: false, + stealthProxy: true, }, quality: 50, }, @@ -177,6 +181,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: false, + stealthProxy: true, }, quality: 40, }, @@ -193,6 +198,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: false, + stealthProxy: false, }, quality: 30, }, @@ -209,6 +215,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: false, + stealthProxy: false, }, quality: 29, }, @@ -225,6 +232,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: false, + stealthProxy: false, }, quality: 20, }, @@ -241,6 +249,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: true, + stealthProxy: true, }, quality: 10, }, @@ -257,6 +266,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: true, + stealthProxy: false, }, quality: 5, }, @@ -273,6 +283,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: true, + stealthProxy: true, // kinda... }, quality: -10, }, @@ -289,6 +300,7 @@ export const engineOptions: { mobile: false, skipTlsVerification: false, useFastMode: true, + stealthProxy: true, // kinda... }, quality: -10, }, diff --git a/apps/api/src/scraper/scrapeURL/index.ts b/apps/api/src/scraper/scrapeURL/index.ts index f95d199f..416d5731 100644 --- a/apps/api/src/scraper/scrapeURL/index.ts +++ b/apps/api/src/scraper/scrapeURL/index.ts @@ -94,6 +94,10 @@ function buildFeatureFlags( flags.add("useFastMode"); } + if (options.proxy === "stealth") { + flags.add("stealthProxy"); + } + const urlO = new URL(url); if (urlO.pathname.endsWith(".pdf")) { diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 8e74d2d8..ad22c2a3 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -94,6 +94,7 @@ export interface CrawlScrapeOptions { skipTlsVerification?: boolean; removeBase64Images?: boolean; blockAds?: boolean; + proxy?: "basic" | "stealth"; } export type Action = {