diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index b1904da..dbb577a 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -309,6 +309,7 @@ ${this.content} const crawlOpts: ScrappingOptions = { proxyUrl: ctx.req.get('x-proxy-url'), cookies, + favorScreenshot: customMode === 'screenshot' }; if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) { @@ -418,6 +419,7 @@ ${this.content} return { isFresh: !stale, + ...cache, snapshot: { ...r, screenshot: undefined, @@ -471,7 +473,7 @@ ${this.content} cache = await this.queryCache(urlToCrawl); } - if (cache?.isFresh) { + if (cache?.isFresh && (!crawlOpts.favorScreenshot || (crawlOpts.favorScreenshot && cache?.screenshotAvailable))) { yield cache.snapshot; return; diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index 4be3a6d..c946954 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -50,6 +50,7 @@ export interface PageSnapshot { export interface ScrappingOptions { proxyUrl?: string; cookies?: CookieParam[]; + favorScreenshot?: boolean; } @@ -289,7 +290,7 @@ document.addEventListener('load', handlePageLoad); yield { ...snapshot, screenshot } as PageSnapshot; break; } - if (snapshot?.title && snapshot?.html !== lastHTML) { + if (options.favorScreenshot && snapshot?.title && snapshot?.html !== lastHTML) { screenshot = await page.screenshot(); lastHTML = snapshot.html; }