mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-04-23 06:10:07 +08:00
fix: curl failure should return 4xx if specified explicitly
This commit is contained in:
parent
bc8dea9139
commit
a30a865140
@ -739,11 +739,20 @@ export class CrawlerHost extends RPCHost {
|
|||||||
// deprecated name
|
// deprecated name
|
||||||
crawlOpts?.engine === 'direct'
|
crawlOpts?.engine === 'direct'
|
||||||
) {
|
) {
|
||||||
const sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ?
|
let sideLoaded;
|
||||||
await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) :
|
try {
|
||||||
await this.curlControl.sideLoad(urlToCrawl, crawlOpts);
|
sideLoaded = (crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) ?
|
||||||
if (!sideLoaded.file) {
|
await this.sideLoadWithAllocatedProxy(urlToCrawl, crawlOpts) :
|
||||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
await this.curlControl.sideLoad(urlToCrawl, crawlOpts);
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof ServiceBadAttemptError) {
|
||||||
|
throw new AssertionFailureError(err.message);
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
if (!sideLoaded?.file) {
|
||||||
|
throw new AssertionFailureError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||||
}
|
}
|
||||||
const draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName);
|
const draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName);
|
||||||
yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
|
yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user