mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-18 07:36:04 +08:00
fix: bad snapshot in sideload should not throw directly
This commit is contained in:
parent
ead906e603
commit
19a0bbe924
@ -782,7 +782,14 @@ export class CrawlerHost extends RPCHost {
|
||||
if (!sideLoaded.file) {
|
||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||
}
|
||||
let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName);
|
||||
let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(
|
||||
urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName
|
||||
).catch((err) => {
|
||||
if (err instanceof ApplicationError) {
|
||||
return Promise.reject(new ServiceBadAttemptError(err.message));
|
||||
}
|
||||
return Promise.reject(err);
|
||||
});
|
||||
if (sideLoaded.status == 200 && !sideLoaded.contentType.startsWith('text/html')) {
|
||||
yield draftSnapshot;
|
||||
return;
|
||||
@ -798,7 +805,14 @@ export class CrawlerHost extends RPCHost {
|
||||
if (!proxyLoaded.file) {
|
||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||
}
|
||||
const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName);
|
||||
const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(
|
||||
urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName
|
||||
).catch((err) => {
|
||||
if (err instanceof ApplicationError) {
|
||||
return Promise.reject(new ServiceBadAttemptError(err.message));
|
||||
}
|
||||
return Promise.reject(err);
|
||||
});
|
||||
analyzed = await this.jsdomControl.analyzeHTMLTextLite(proxySnapshot.html);
|
||||
if (proxyLoaded.status === 200 || analyzed.tokens >= 200) {
|
||||
draftSnapshot = proxySnapshot;
|
||||
|
Loading…
x
Reference in New Issue
Block a user