mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 10:29:00 +08:00
fix(crawler): also take the hostname of the base url when determining isInternalLink
This commit is contained in:
parent
b82cfa8540
commit
35d1d85978
@ -397,8 +397,7 @@ export class WebCrawler {
|
||||
|
||||
private isInternalLink(link: string): boolean {
|
||||
const urlObj = new URL(link, this.baseUrl);
|
||||
const baseDomain = this.baseUrl
|
||||
.replace(/^https?:\/\//, "")
|
||||
const baseDomain = new URL(this.baseUrl).hostname
|
||||
.replace(/^www\./, "")
|
||||
.trim();
|
||||
const linkDomain = urlObj.hostname.replace(/^www\./, "").trim();
|
||||
|
Loading…
x
Reference in New Issue
Block a user