mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 07:35:57 +08:00
fix: improved code rules
This commit is contained in:
parent
fd9a86bc00
commit
ee008ebe10
@ -132,7 +132,10 @@ export class CrawlerHost extends RPCHost {
|
||||
}
|
||||
|
||||
getTurndown(noRules?: boolean | string) {
|
||||
const turnDownService = new TurndownService();
|
||||
const turnDownService = new TurndownService({
|
||||
codeBlockStyle: 'fenced',
|
||||
preformattedCode: true,
|
||||
} as any);
|
||||
if (!noRules) {
|
||||
turnDownService.addRule('remove-irrelevant', {
|
||||
filter: ['meta', 'style', 'script', 'noscript', 'link', 'textarea'],
|
||||
@ -179,6 +182,30 @@ export class CrawlerHost extends RPCHost {
|
||||
return `[${fixedContent}](${fixedHref}${title || ''})`;
|
||||
}
|
||||
});
|
||||
turnDownService.addRule('improved-code', {
|
||||
filter: function (node: any) {
|
||||
let hasSiblings = node.previousSibling || node.nextSibling;
|
||||
let isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
|
||||
|
||||
return node.nodeName === 'CODE' && !isCodeBlock;
|
||||
},
|
||||
|
||||
replacement: function (inputContent: any) {
|
||||
if (!inputContent) return '';
|
||||
let content = inputContent;
|
||||
|
||||
let delimiter = '`';
|
||||
let matches = content.match(/`+/gm) || [];
|
||||
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
|
||||
if (content.includes('\n')) {
|
||||
delimiter = '```';
|
||||
}
|
||||
|
||||
let extraSpace = delimiter === '```' ? '\n' : /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
|
||||
|
||||
return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
|
||||
}
|
||||
});
|
||||
|
||||
return turnDownService;
|
||||
}
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit a6116b73e99e3d335b0cd4cfcae8f4f0c7e72f6d
|
||||
Subproject commit 5939c7091985706bebe7d1d83591430426b292c8
|
Loading…
x
Reference in New Issue
Block a user