fix(crawler/rust): dedupe

This commit is contained in:
Móricz Gergő 2025-01-30 08:16:51 +01:00
parent c88176a596
commit a7eb2f7c6a

View File

@ -401,13 +401,13 @@ export class WebCrawler {
public async extractLinksFromHTML(html: string, url: string) {
try {
return (await this.extractLinksFromHTMLRust(html, url)).map(x => {
return [...new Set((await this.extractLinksFromHTMLRust(html, url)).map(x => {
try {
return new URL(x, url).href
} catch (e) {
return null;
}
}).filter(x => x !== null) as string[];
}).filter(x => x !== null) as string[])];
} catch (error) {
this.logger.error("Failed to call html-transformer! Falling back to cheerio...", {
error,