fix: improved code rules

This commit is contained in:
yanlong.wang 2024-06-13 16:27:30 +08:00
parent fd9a86bc00
commit ee008ebe10
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 29 additions and 2 deletions

View File

@ -132,7 +132,10 @@ export class CrawlerHost extends RPCHost {
}
getTurndown(noRules?: boolean | string) {
const turnDownService = new TurndownService();
const turnDownService = new TurndownService({
codeBlockStyle: 'fenced',
preformattedCode: true,
} as any);
if (!noRules) {
turnDownService.addRule('remove-irrelevant', {
filter: ['meta', 'style', 'script', 'noscript', 'link', 'textarea'],
@ -179,6 +182,30 @@ export class CrawlerHost extends RPCHost {
return `[${fixedContent}](${fixedHref}${title || ''})`;
}
});
turnDownService.addRule('improved-code', {
filter: function (node: any) {
let hasSiblings = node.previousSibling || node.nextSibling;
let isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock;
},
replacement: function (inputContent: any) {
if (!inputContent) return '';
let content = inputContent;
let delimiter = '`';
let matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
if (content.includes('\n')) {
delimiter = '```';
}
let extraSpace = delimiter === '```' ? '\n' : /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
}
});
return turnDownService;
}

@ -1 +1 @@
Subproject commit a6116b73e99e3d335b0cd4cfcae8f4f0c7e72f6d
Subproject commit 5939c7091985706bebe7d1d83591430426b292c8