fix: tidyMarkdown feature in turndown rues

This commit is contained in:
yanlong.wang 2024-05-09 15:15:15 +08:00
parent 36bf5d96b5
commit e417cd8a53
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37

View File

@ -75,6 +75,38 @@ export class CrawlerHost extends RPCHost {
replacement: (innerText) => `${innerText}\n===============\n`
});
}
turnDownService.addRule('improved-paragraph', {
filter: 'p',
replacement: (innerText) => {
const trimmed = innerText.trim();
if (!trimmed) {
return '';
}
return `${trimmed.replace(/\n{3,}/g, '\n\n')}\n\n`;
}
});
turnDownService.addRule('improved-inline-link', {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
);
},
replacement: function (content, node) {
let href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
let title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
const fixedContent = content.replace(/\s+/g, ' ').trim();
const fixedHref = href.replace(/\s+/g, '').trim();
return `[${fixedContent}](${fixedHref}${title || ''})`;
}
});
return turnDownService;
}