diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 936eafa..d3ec7aa 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -590,33 +590,26 @@ export class CrawlerHost extends RPCHost { } async *cachedScrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, crawlerOpts?: CrawlerOptions) { + let overrideFinalSnapshot; if (crawlerOpts?.html) { - const fakeSnapshot = { + overrideFinalSnapshot = { href: urlToCrawl.toString(), html: crawlerOpts.html, title: '', text: '', } as PageSnapshot; - - yield this.jsdomControl.narrowSnapshot(fakeSnapshot, crawlOpts); - - return; } if (crawlerOpts?.pdf) { const pdfBuf = crawlerOpts.pdf instanceof Blob ? await crawlerOpts.pdf.arrayBuffer().then((x) => Buffer.from(x)) : Buffer.from(crawlerOpts.pdf, 'base64'); const pdfDataUrl = `data:application/pdf;base64,${pdfBuf.toString('base64')}`; - const fakeSnapshot = { + overrideFinalSnapshot = { href: urlToCrawl.toString(), html: `