mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 11:45:56 +08:00
fix: catch turndown errors
This commit is contained in:
parent
6ee0f2de75
commit
cbc13ecbbd
@ -51,7 +51,7 @@ function tidyMarkdown(markdown: string): string {
|
||||
export class CrawlerHost extends RPCHost {
|
||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||
|
||||
turnDownPlugins = [require('turndown-plugin-gfm').gfm];
|
||||
turnDownPlugins = [require('turndown-plugin-gfm').tables];
|
||||
|
||||
constructor(
|
||||
protected globalLogger: Logger,
|
||||
@ -107,11 +107,31 @@ export class CrawlerHost extends RPCHost {
|
||||
}
|
||||
});
|
||||
|
||||
contentText = turnDownService.turndown(toBeTurnedToMd).trim();
|
||||
try {
|
||||
contentText = turnDownService.turndown(toBeTurnedToMd).trim();
|
||||
} catch (err) {
|
||||
this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
|
||||
const vanillaTurnDownService = new TurndownService();
|
||||
try {
|
||||
contentText = vanillaTurnDownService.turndown(toBeTurnedToMd).trim();
|
||||
} catch (err2) {
|
||||
this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
||||
contentText = turnDownService.turndown(snapshot.html);
|
||||
try {
|
||||
contentText = turnDownService.turndown(snapshot.html);
|
||||
} catch (err) {
|
||||
this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
|
||||
const vanillaTurnDownService = new TurndownService();
|
||||
try {
|
||||
contentText = vanillaTurnDownService.turndown(snapshot.html);
|
||||
} catch (err2) {
|
||||
this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
|
||||
contentText = snapshot.text;
|
||||
|
Loading…
x
Reference in New Issue
Block a user