mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 18:40:45 +08:00
Merge pull request #629 from mendableai/go-parser-singleton
Feat: parser singleton
This commit is contained in:
commit
554a05068c
@ -8,9 +8,38 @@ import dotenv from 'dotenv';
|
|||||||
import { Logger } from './logger';
|
import { Logger } from './logger';
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
// TODO: create a singleton for the converter
|
|
||||||
// TODO: add a timeout to the Go parser
|
// TODO: add a timeout to the Go parser
|
||||||
|
|
||||||
|
class GoMarkdownConverter {
|
||||||
|
private static instance: GoMarkdownConverter;
|
||||||
|
private convert: any;
|
||||||
|
|
||||||
|
private constructor() {
|
||||||
|
const goExecutablePath = join(__dirname, 'go-html-to-md/html-to-markdown.so');
|
||||||
|
const lib = koffi.load(goExecutablePath);
|
||||||
|
this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static getInstance(): GoMarkdownConverter {
|
||||||
|
if (!GoMarkdownConverter.instance) {
|
||||||
|
GoMarkdownConverter.instance = new GoMarkdownConverter();
|
||||||
|
}
|
||||||
|
return GoMarkdownConverter.instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async convertHTMLToMarkdown(html: string): Promise<string> {
|
||||||
|
return new Promise<string>((resolve, reject) => {
|
||||||
|
this.convert.async(html, (err: Error, res: string) => {
|
||||||
|
if (err) {
|
||||||
|
reject(err);
|
||||||
|
} else {
|
||||||
|
resolve(res);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function parseMarkdown(html: string): Promise<string> {
|
export async function parseMarkdown(html: string): Promise<string> {
|
||||||
if (!html) {
|
if (!html) {
|
||||||
return '';
|
return '';
|
||||||
@ -18,20 +47,8 @@ export async function parseMarkdown(html: string): Promise<string> {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
|
if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
|
||||||
const goExecutablePath = join(__dirname, 'go-html-to-md/html-to-markdown.so');
|
const converter = GoMarkdownConverter.getInstance();
|
||||||
const lib = koffi.load(goExecutablePath);
|
let markdownContent = await converter.convertHTMLToMarkdown(html);
|
||||||
|
|
||||||
const convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
|
|
||||||
|
|
||||||
let markdownContent = await new Promise<string>((resolve, reject) => {
|
|
||||||
convert.async(html, (err: Error, res: string) => {
|
|
||||||
if (err) {
|
|
||||||
reject(err);
|
|
||||||
} else {
|
|
||||||
resolve(res);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
markdownContent = processMultiLineLinks(markdownContent);
|
markdownContent = processMultiLineLinks(markdownContent);
|
||||||
markdownContent = removeSkipToContentLinks(markdownContent);
|
markdownContent = removeSkipToContentLinks(markdownContent);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user