mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 20:39:00 +08:00
fix(crawler): also take the hostname of the base url when determining isInternalLink
This commit is contained in:
parent
b82cfa8540
commit
35d1d85978
@ -397,8 +397,7 @@ export class WebCrawler {
|
|||||||
|
|
||||||
private isInternalLink(link: string): boolean {
|
private isInternalLink(link: string): boolean {
|
||||||
const urlObj = new URL(link, this.baseUrl);
|
const urlObj = new URL(link, this.baseUrl);
|
||||||
const baseDomain = this.baseUrl
|
const baseDomain = new URL(this.baseUrl).hostname
|
||||||
.replace(/^https?:\/\//, "")
|
|
||||||
.replace(/^www\./, "")
|
.replace(/^www\./, "")
|
||||||
.trim();
|
.trim();
|
||||||
const linkDomain = urlObj.hostname.replace(/^www\./, "").trim();
|
const linkDomain = urlObj.hostname.replace(/^www\./, "").trim();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user