fix: curl failure should return 4xx if specified explicitly

This commit is contained in:
yanlong.wang 2025-03-24 16:33:43 +08:00
parent bc8dea9139
commit a30a865140
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37

View File

@ -739,11 +739,20 @@ export class CrawlerHost extends RPCHost {
// deprecated name
crawlOpts?.engine === 'direct'
) {
const sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ?
await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) :
await this.curlControl.sideLoad(urlToCrawl, crawlOpts);
if (!sideLoaded.file) {
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
let sideLoaded;
try {
sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ?
await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) :
await this.curlControl.sideLoad(urlToCrawl, crawlOpts);
} catch (err) {
if (err instanceof ServiceBadAttemptError) {
throw new AssertionFailureError(err.message);
}
throw err;
}
if (!sideLoaded?.file) {
throw new AssertionFailureError(`Remote server did not return a body: ${urlToCrawl}`);
}
const draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName);
yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);