diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index ea606f44..14ae5d71 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -401,13 +401,13 @@ export class WebCrawler { public async extractLinksFromHTML(html: string, url: string) { try { - return (await this.extractLinksFromHTMLRust(html, url)).map(x => { + return [...new Set((await this.extractLinksFromHTMLRust(html, url)).map(x => { try { return new URL(x, url).href } catch (e) { return null; } - }).filter(x => x !== null) as string[]; + }).filter(x => x !== null) as string[])]; } catch (error) { this.logger.error("Failed to call html-transformer! Falling back to cheerio...", { error,