diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 3cd59b6c..089d373c 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -129,3 +129,11 @@ export interface FireEngineResponse { pageError?: string; } + +export interface FireEngineOptions{ + mobileProxy?: boolean; + method?: string; + engine?: string; + blockMedia?: boolean; + blockAds?: boolean; +} diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index acaa432e..80705dbd 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -8,7 +8,6 @@ import { scrapSingleUrl } from "./single_url"; import robotsParser from "robots-parser"; import { getURLDepth } from "./utils/maxDepthUtils"; import { axiosTimeout } from "../../../src/lib/timeout"; -import { scrapWithFireEngine } from "./scrapers/fireEngine"; export class WebCrawler { private initialUrl: string; diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 2e971139..cb7783a6 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -1,5 +1,5 @@ import axios from "axios"; -import { FireEngineResponse } from "../../../lib/entities"; +import { FireEngineOptions, FireEngineResponse } from "../../../lib/entities"; import { logScrape } from "../../../services/logging/scrape_log"; import { generateRequestParams } from "../single_url"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; @@ -20,6 +20,7 @@ export async function scrapWithFireEngine({ waitFor = 0, screenshot = false, pageOptions = { parsePDF: true }, + fireEngineOptions = {}, headers, options, }: { @@ -27,6 +28,7 @@ export async function scrapWithFireEngine({ waitFor?: number; screenshot?: boolean; pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean }; + fireEngineOptions?: FireEngineOptions; headers?: Record; options?: any; }): Promise { @@ -57,6 +59,7 @@ export async function scrapWithFireEngine({ screenshot: screenshotParam, headers: headers, pageOptions: pageOptions, + ...fireEngineOptions, }, { headers: { diff --git a/apps/api/src/scraper/WebScraper/sitemap.ts b/apps/api/src/scraper/WebScraper/sitemap.ts index 6d1d28be..1184ef27 100644 --- a/apps/api/src/scraper/WebScraper/sitemap.ts +++ b/apps/api/src/scraper/WebScraper/sitemap.ts @@ -21,7 +21,7 @@ export async function getLinksFromSitemap( const response = await axios.get(sitemapUrl, { timeout: axiosTimeout }); content = response.data; } else if (mode === 'fire-engine') { - const response = await scrapWithFireEngine({ url: sitemapUrl }); + const response = await scrapWithFireEngine({ url: sitemapUrl, fireEngineOptions: { engine: "request", method: "get", mobileProxy: true } }); content = response.html; } } catch (error) {