mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-18 21:45:58 +08:00
fix: potential gfm performance issue
This commit is contained in:
parent
16cabcaf22
commit
deb0b6dc23
@ -45,14 +45,37 @@ export interface FormattedPage {
|
|||||||
export const md5Hasher = new HashManager('md5', 'hex');
|
export const md5Hasher = new HashManager('md5', 'hex');
|
||||||
|
|
||||||
const gfmPlugin = require('turndown-plugin-gfm');
|
const gfmPlugin = require('turndown-plugin-gfm');
|
||||||
|
const highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
||||||
|
|
||||||
|
export function highlightedCodeBlock(turndownService: TurndownService) {
|
||||||
|
turndownService.addRule('highlightedCodeBlock', {
|
||||||
|
filter: (node) => {
|
||||||
|
return (
|
||||||
|
node.nodeName === 'DIV' &&
|
||||||
|
node.firstChild?.nodeName === 'PRE' &&
|
||||||
|
highlightRegExp.test(node.className)
|
||||||
|
);
|
||||||
|
},
|
||||||
|
replacement: (_content, node, options)=> {
|
||||||
|
const className = (node as any).className || '';
|
||||||
|
const language = (className.match(highlightRegExp) || [null, ''])[1];
|
||||||
|
|
||||||
|
return (
|
||||||
|
'\n\n' + options.fence + language + '\n' +
|
||||||
|
node.firstChild!.textContent +
|
||||||
|
'\n' + options.fence + '\n\n'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
export class SnapshotFormatter extends AsyncService {
|
export class SnapshotFormatter extends AsyncService {
|
||||||
|
|
||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
gfmPlugin = gfmPlugin.gfm;
|
gfmPlugin = [gfmPlugin.tables, highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
|
||||||
gfmNoTable = [gfmPlugin.highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
|
gfmNoTable = [highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: Logger,
|
||||||
@ -475,7 +498,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
imgDataUrlToObjectUrl?: boolean;
|
imgDataUrlToObjectUrl?: boolean;
|
||||||
removeImages?: boolean | 'src';
|
removeImages?: boolean | 'src';
|
||||||
customRules?: { [k: string]: Rule; };
|
customRules?: { [k: string]: Rule; };
|
||||||
customKeep?: Filter
|
customKeep?: Filter;
|
||||||
}) {
|
}) {
|
||||||
const turnDownService = new TurndownService({
|
const turnDownService = new TurndownService({
|
||||||
codeBlockStyle: 'fenced',
|
codeBlockStyle: 'fenced',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user