mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-18 02:35:55 +08:00
fix: clean broken markdown
This commit is contained in:
parent
7fc30dd003
commit
9b190127aa
@ -8,6 +8,7 @@ import { Request, Response } from 'express';
|
|||||||
import normalizeUrl from "@esm2cjs/normalize-url";
|
import normalizeUrl from "@esm2cjs/normalize-url";
|
||||||
|
|
||||||
function tidyMarkdown(markdown: string): string {
|
function tidyMarkdown(markdown: string): string {
|
||||||
|
|
||||||
// Step 1: Handle complex broken links with text and optional images spread across multiple lines
|
// Step 1: Handle complex broken links with text and optional images spread across multiple lines
|
||||||
let normalizedMarkdown = markdown.replace(/\[\s*([^]+?)\s*\]\s*\(\s*([^)]+)\s*\)/g, (match, text, url) => {
|
let normalizedMarkdown = markdown.replace(/\[\s*([^]+?)\s*\]\s*\(\s*([^)]+)\s*\)/g, (match, text, url) => {
|
||||||
// Remove internal new lines and excessive spaces within the text
|
// Remove internal new lines and excessive spaces within the text
|
||||||
@ -39,7 +40,10 @@ function tidyMarkdown(markdown: string): string {
|
|||||||
// Step 3: Replace more than two consecutive empty lines with exactly two empty lines
|
// Step 3: Replace more than two consecutive empty lines with exactly two empty lines
|
||||||
normalizedMarkdown = normalizedMarkdown.replace(/\n{3,}/g, '\n\n');
|
normalizedMarkdown = normalizedMarkdown.replace(/\n{3,}/g, '\n\n');
|
||||||
|
|
||||||
return normalizedMarkdown;
|
// Step 4: Remove leading spaces from each line
|
||||||
|
normalizedMarkdown = normalizedMarkdown.replace(/^[ \t]+/gm, '');
|
||||||
|
|
||||||
|
return normalizedMarkdown.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user