From 35d1d85978ef840c0bba05f5c0201c0e2ac1d17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=B3ricz=20Gerg=C5=91?= Date: Tue, 7 Jan 2025 09:29:58 +0100 Subject: [PATCH] fix(crawler): also take the hostname of the base url when determining isInternalLink --- apps/api/src/scraper/WebScraper/crawler.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 9c49cf9b..dd949238 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -397,8 +397,7 @@ export class WebCrawler { private isInternalLink(link: string): boolean { const urlObj = new URL(link, this.baseUrl); - const baseDomain = this.baseUrl - .replace(/^https?:\/\//, "") + const baseDomain = new URL(this.baseUrl).hostname .replace(/^www\./, "") .trim(); const linkDomain = urlObj.hostname.replace(/^www\./, "").trim();