mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-17 07:26:04 +08:00
fix: catch turndown errors
This commit is contained in:
parent
6ee0f2de75
commit
cbc13ecbbd
@ -51,7 +51,7 @@ function tidyMarkdown(markdown: string): string {
|
|||||||
export class CrawlerHost extends RPCHost {
|
export class CrawlerHost extends RPCHost {
|
||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
turnDownPlugins = [require('turndown-plugin-gfm').gfm];
|
turnDownPlugins = [require('turndown-plugin-gfm').tables];
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: Logger,
|
||||||
@ -107,11 +107,31 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
contentText = turnDownService.turndown(toBeTurnedToMd).trim();
|
try {
|
||||||
|
contentText = turnDownService.turndown(toBeTurnedToMd).trim();
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
|
||||||
|
const vanillaTurnDownService = new TurndownService();
|
||||||
|
try {
|
||||||
|
contentText = vanillaTurnDownService.turndown(toBeTurnedToMd).trim();
|
||||||
|
} catch (err2) {
|
||||||
|
this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
||||||
contentText = turnDownService.turndown(snapshot.html);
|
try {
|
||||||
|
contentText = turnDownService.turndown(snapshot.html);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
|
||||||
|
const vanillaTurnDownService = new TurndownService();
|
||||||
|
try {
|
||||||
|
contentText = vanillaTurnDownService.turndown(snapshot.html);
|
||||||
|
} catch (err2) {
|
||||||
|
this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
|
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
|
||||||
contentText = snapshot.text;
|
contentText = snapshot.text;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user