diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index c331d95..e3db55c 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -1,4 +1,4 @@ -import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection } from 'civkit'; +import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit'; import { singleton } from 'tsyringe'; import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared'; import _ from 'lodash'; @@ -90,10 +90,6 @@ ${this.content} try { for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) { - if (!scrapped) { - continue; - } - const formatted = this.formatSnapshot(scrapped); if (scrapped.screenshot && screenshotEnabled) { @@ -134,6 +130,10 @@ ${this.content} return formatted; } + if (!lastScrapped) { + throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`); + } + return this.formatSnapshot(lastScrapped); } @@ -148,6 +148,10 @@ ${this.content} return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null }); } + if (!lastScrapped) { + throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`); + } + return `${this.formatSnapshot(lastScrapped)}`; } diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index 0d3dcb2..327198f 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -153,7 +153,7 @@ function giveSnapshot() { return page; } - async *scrap(url: string, noCache: string | boolean = false) { + async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator { const parsedUrl = new URL(url); // parsedUrl.search = ''; parsedUrl.hash = '';