diff --git a/apps/api/src/lib/extract/document-scraper.ts b/apps/api/src/lib/extract/document-scraper.ts index 04194b0b..91d515df 100644 --- a/apps/api/src/lib/extract/document-scraper.ts +++ b/apps/api/src/lib/extract/document-scraper.ts @@ -14,10 +14,13 @@ interface ScrapeDocumentOptions { timeout: number; } -export async function scrapeDocument(options: ScrapeDocumentOptions, urlTraces: URLTrace[]): Promise { +export async function scrapeDocument( + options: ScrapeDocumentOptions, + urlTraces: URLTrace[], +): Promise { const trace = urlTraces.find((t) => t.url === options.url); if (trace) { - trace.status = 'scraped'; + trace.status = "scraped"; trace.timing.scrapedAt = new Date().toISOString(); } @@ -35,7 +38,9 @@ export async function scrapeDocument(options: ScrapeDocumentOptions, urlTraces: mode: "single_urls", team_id: options.teamId, scrapeOptions: scrapeOptions.parse({}), - internalOptions: {}, + internalOptions: { + useCache: true, + }, plan: options.plan, origin: options.origin, is_scrape: true, @@ -61,9 +66,9 @@ export async function scrapeDocument(options: ScrapeDocumentOptions, urlTraces: } catch (error) { logger.error(`Error in scrapeDocument: ${error}`); if (trace) { - trace.status = 'error'; + trace.status = "error"; trace.error = error.message; } return null; } -} \ No newline at end of file +} diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index bf51ac94..956fc3ab 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -298,6 +298,12 @@ export function buildFallbackList(meta: Meta): { engine: Engine; unsupportedFeatures: Set; }[] { + + if (meta.internalOptions.useCache !== true) { + engines.splice(engines.indexOf("cache"), 1); + }else{ + meta.logger.debug("Cache engine enabled by useCache option"); + } const prioritySum = [...meta.featureFlags].reduce( (a, x) => a + featureFlagOptions[x].priority, 0, diff --git a/apps/api/src/scraper/scrapeURL/index.ts b/apps/api/src/scraper/scrapeURL/index.ts index 549ce9d1..b13f7d9a 100644 --- a/apps/api/src/scraper/scrapeURL/index.ts +++ b/apps/api/src/scraper/scrapeURL/index.ts @@ -151,7 +151,7 @@ export type InternalOptions = { v0CrawlOnlyUrls?: boolean; v0DisableJsDom?: boolean; - + useCache?: boolean; disableSmartWaitCache?: boolean; // Passed along to fire-engine isBackgroundIndex?: boolean; };