mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-17 04:45:57 +08:00
fix: improved code rules
This commit is contained in:
parent
fd9a86bc00
commit
ee008ebe10
@ -132,7 +132,10 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
getTurndown(noRules?: boolean | string) {
|
getTurndown(noRules?: boolean | string) {
|
||||||
const turnDownService = new TurndownService();
|
const turnDownService = new TurndownService({
|
||||||
|
codeBlockStyle: 'fenced',
|
||||||
|
preformattedCode: true,
|
||||||
|
} as any);
|
||||||
if (!noRules) {
|
if (!noRules) {
|
||||||
turnDownService.addRule('remove-irrelevant', {
|
turnDownService.addRule('remove-irrelevant', {
|
||||||
filter: ['meta', 'style', 'script', 'noscript', 'link', 'textarea'],
|
filter: ['meta', 'style', 'script', 'noscript', 'link', 'textarea'],
|
||||||
@ -179,6 +182,30 @@ export class CrawlerHost extends RPCHost {
|
|||||||
return `[${fixedContent}](${fixedHref}${title || ''})`;
|
return `[${fixedContent}](${fixedHref}${title || ''})`;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
turnDownService.addRule('improved-code', {
|
||||||
|
filter: function (node: any) {
|
||||||
|
let hasSiblings = node.previousSibling || node.nextSibling;
|
||||||
|
let isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
|
||||||
|
|
||||||
|
return node.nodeName === 'CODE' && !isCodeBlock;
|
||||||
|
},
|
||||||
|
|
||||||
|
replacement: function (inputContent: any) {
|
||||||
|
if (!inputContent) return '';
|
||||||
|
let content = inputContent;
|
||||||
|
|
||||||
|
let delimiter = '`';
|
||||||
|
let matches = content.match(/`+/gm) || [];
|
||||||
|
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
|
||||||
|
if (content.includes('\n')) {
|
||||||
|
delimiter = '```';
|
||||||
|
}
|
||||||
|
|
||||||
|
let extraSpace = delimiter === '```' ? '\n' : /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
|
||||||
|
|
||||||
|
return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
return turnDownService;
|
return turnDownService;
|
||||||
}
|
}
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit a6116b73e99e3d335b0cd4cfcae8f4f0c7e72f6d
|
Subproject commit 5939c7091985706bebe7d1d83591430426b292c8
|
Loading…
x
Reference in New Issue
Block a user