From 19a0bbe924123f81f5d71f2c94de560a8fe4c652 Mon Sep 17 00:00:00 2001 From: Yanlong Wang Date: Mon, 10 Mar 2025 09:48:22 +0800 Subject: [PATCH] fix: bad snapshot in sideload should not throw directly --- src/api/crawler.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/api/crawler.ts b/src/api/crawler.ts index d214ce1..143f802 100644 --- a/src/api/crawler.ts +++ b/src/api/crawler.ts @@ -782,7 +782,14 @@ export class CrawlerHost extends RPCHost { if (!sideLoaded.file) { throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`); } - let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName); + let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile( + urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName + ).catch((err) => { + if (err instanceof ApplicationError) { + return Promise.reject(new ServiceBadAttemptError(err.message)); + } + return Promise.reject(err); + }); if (sideLoaded.status == 200 && !sideLoaded.contentType.startsWith('text/html')) { yield draftSnapshot; return; @@ -798,7 +805,14 @@ export class CrawlerHost extends RPCHost { if (!proxyLoaded.file) { throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`); } - const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName); + const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile( + urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName + ).catch((err) => { + if (err instanceof ApplicationError) { + return Promise.reject(new ServiceBadAttemptError(err.message)); + } + return Promise.reject(err); + }); analyzed = await this.jsdomControl.analyzeHTMLTextLite(proxySnapshot.html); if (proxyLoaded.status === 200 || analyzed.tokens >= 200) { draftSnapshot = proxySnapshot;