mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-04-18 11:50:00 +08:00
fix: curl failure should return 4xx if specified explicitly
This commit is contained in:
parent
bc8dea9139
commit
a30a865140
@ -739,11 +739,20 @@ export class CrawlerHost extends RPCHost {
|
||||
// deprecated name
|
||||
crawlOpts?.engine === 'direct'
|
||||
) {
|
||||
const sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ?
|
||||
await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) :
|
||||
await this.curlControl.sideLoad(urlToCrawl, crawlOpts);
|
||||
if (!sideLoaded.file) {
|
||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||
let sideLoaded;
|
||||
try {
|
||||
sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ?
|
||||
await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) :
|
||||
await this.curlControl.sideLoad(urlToCrawl, crawlOpts);
|
||||
|
||||
} catch (err) {
|
||||
if (err instanceof ServiceBadAttemptError) {
|
||||
throw new AssertionFailureError(err.message);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (!sideLoaded?.file) {
|
||||
throw new AssertionFailureError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||
}
|
||||
const draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName);
|
||||
yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
|
||||
|
Loading…
x
Reference in New Issue
Block a user