mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-19 22:09:10 +08:00
fix: bad snapshot in sideload should not throw directly
This commit is contained in:
parent
ead906e603
commit
19a0bbe924
@ -782,7 +782,14 @@ export class CrawlerHost extends RPCHost {
|
|||||||
if (!sideLoaded.file) {
|
if (!sideLoaded.file) {
|
||||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||||
}
|
}
|
||||||
let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName);
|
let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(
|
||||||
|
urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName
|
||||||
|
).catch((err) => {
|
||||||
|
if (err instanceof ApplicationError) {
|
||||||
|
return Promise.reject(new ServiceBadAttemptError(err.message));
|
||||||
|
}
|
||||||
|
return Promise.reject(err);
|
||||||
|
});
|
||||||
if (sideLoaded.status == 200 && !sideLoaded.contentType.startsWith('text/html')) {
|
if (sideLoaded.status == 200 && !sideLoaded.contentType.startsWith('text/html')) {
|
||||||
yield draftSnapshot;
|
yield draftSnapshot;
|
||||||
return;
|
return;
|
||||||
@ -798,7 +805,14 @@ export class CrawlerHost extends RPCHost {
|
|||||||
if (!proxyLoaded.file) {
|
if (!proxyLoaded.file) {
|
||||||
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
|
||||||
}
|
}
|
||||||
const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName);
|
const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(
|
||||||
|
urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName
|
||||||
|
).catch((err) => {
|
||||||
|
if (err instanceof ApplicationError) {
|
||||||
|
return Promise.reject(new ServiceBadAttemptError(err.message));
|
||||||
|
}
|
||||||
|
return Promise.reject(err);
|
||||||
|
});
|
||||||
analyzed = await this.jsdomControl.analyzeHTMLTextLite(proxySnapshot.html);
|
analyzed = await this.jsdomControl.analyzeHTMLTextLite(proxySnapshot.html);
|
||||||
if (proxyLoaded.status === 200 || analyzed.tokens >= 200) {
|
if (proxyLoaded.status === 200 || analyzed.tokens >= 200) {
|
||||||
draftSnapshot = proxySnapshot;
|
draftSnapshot = proxySnapshot;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user