From a30a865140a9d33a1b04fb538343f0ec1433b4dd Mon Sep 17 00:00:00 2001 From: "yanlong.wang" Date: Mon, 24 Mar 2025 16:33:43 +0800 Subject: [PATCH] fix: curl failure should return 4xx if specified explicitly --- src/api/crawler.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/api/crawler.ts b/src/api/crawler.ts index 8020ab3..5d9e75e 100644 --- a/src/api/crawler.ts +++ b/src/api/crawler.ts @@ -739,11 +739,20 @@ export class CrawlerHost extends RPCHost { // deprecated name crawlOpts?.engine === 'direct' ) { - const sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ? - await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) : - await this.curlControl.sideLoad(urlToCrawl, crawlOpts); - if (!sideLoaded.file) { - throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`); + let sideLoaded; + try { + sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ? + await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) : + await this.curlControl.sideLoad(urlToCrawl, crawlOpts); + + } catch (err) { + if (err instanceof ServiceBadAttemptError) { + throw new AssertionFailureError(err.message); + } + throw err; + } + if (!sideLoaded?.file) { + throw new AssertionFailureError(`Remote server did not return a body: ${urlToCrawl}`); } const draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName); yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);