fix(crawler): also take the hostname of the base url when determining isInternalLink

This commit is contained in:
Móricz Gergő 2025-01-07 09:29:58 +01:00
parent b82cfa8540
commit 35d1d85978

View File

@ -397,8 +397,7 @@ export class WebCrawler {
private isInternalLink(link: string): boolean { private isInternalLink(link: string): boolean {
const urlObj = new URL(link, this.baseUrl); const urlObj = new URL(link, this.baseUrl);
const baseDomain = this.baseUrl const baseDomain = new URL(this.baseUrl).hostname
.replace(/^https?:\/\//, "")
.replace(/^www\./, "") .replace(/^www\./, "")
.trim(); .trim();
const linkDomain = urlObj.hostname.replace(/^www\./, "").trim(); const linkDomain = urlObj.hostname.replace(/^www\./, "").trim();