From 950338261a8b2626bb304e89a72088b1db771963 Mon Sep 17 00:00:00 2001 From: Yanlong Wang Date: Sat, 13 Apr 2024 08:07:55 +0800 Subject: [PATCH] fix --- backend/functions/src/cloud-functions/crawler.ts | 4 ++++ backend/functions/src/services/puppeteer.ts | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index e3db55c..205caf2 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -90,6 +90,10 @@ ${this.content} try { for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) { + if (!scrapped) { + continue; + } + const formatted = this.formatSnapshot(scrapped); if (scrapped.screenshot && screenshotEnabled) { diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index 327198f..462c6c8 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -15,7 +15,7 @@ export interface PageSnapshot { href: string; html: string; text: string; - parsed: { + parsed?: { title: string; content: string; textContent: string; @@ -78,7 +78,7 @@ export class PuppeteerControl extends AsyncService { timeout: 10_000 }).catch((err) => { this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err }); - process.nextTick(()=> { + process.nextTick(() => { process.exit(1); }); return Promise.reject(err); @@ -153,7 +153,7 @@ function giveSnapshot() { return page; } - async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator { + async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator { const parsedUrl = new URL(url); // parsedUrl.search = ''; parsedUrl.hash = ''; @@ -236,7 +236,7 @@ function giveSnapshot() { while (true) { await Promise.race([nextSnapshotDeferred.promise, gotoPromise]); if (finalized) { - yield { ...snapshot, screenshot }; + yield { ...snapshot, screenshot } as PageSnapshot; break; } yield snapshot;