fix: bad snapshot in sideload should not throw directly

This commit is contained in:
Yanlong Wang 2025-03-10 09:48:22 +08:00
parent ead906e603
commit 19a0bbe924
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37

View File

@ -782,7 +782,14 @@ export class CrawlerHost extends RPCHost {
if (!sideLoaded.file) { if (!sideLoaded.file) {
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`); throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
} }
let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName); let draftSnapshot = await this.snapshotFormatter.createSnapshotFromFile(
urlToCrawl, sideLoaded.file, sideLoaded.contentType, sideLoaded.fileName
).catch((err) => {
if (err instanceof ApplicationError) {
return Promise.reject(new ServiceBadAttemptError(err.message));
}
return Promise.reject(err);
});
if (sideLoaded.status == 200 && !sideLoaded.contentType.startsWith('text/html')) { if (sideLoaded.status == 200 && !sideLoaded.contentType.startsWith('text/html')) {
yield draftSnapshot; yield draftSnapshot;
return; return;
@ -798,7 +805,14 @@ export class CrawlerHost extends RPCHost {
if (!proxyLoaded.file) { if (!proxyLoaded.file) {
throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`); throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
} }
const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName); const proxySnapshot = await this.snapshotFormatter.createSnapshotFromFile(
urlToCrawl, proxyLoaded.file, proxyLoaded.contentType, proxyLoaded.fileName
).catch((err) => {
if (err instanceof ApplicationError) {
return Promise.reject(new ServiceBadAttemptError(err.message));
}
return Promise.reject(err);
});
analyzed = await this.jsdomControl.analyzeHTMLTextLite(proxySnapshot.html); analyzed = await this.jsdomControl.analyzeHTMLTextLite(proxySnapshot.html);
if (proxyLoaded.status === 200 || analyzed.tokens >= 200) { if (proxyLoaded.status === 200 || analyzed.tokens >= 200) {
draftSnapshot = proxySnapshot; draftSnapshot = proxySnapshot;