fix(crawler): sitemaps poisoning crawls with unrelated links (#1334)

This commit is contained in:
Gergő Móricz 2025-03-13 19:31:05 +01:00 committed by GitHub
parent da6b7505f3
commit c7ae50d2d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -271,7 +271,7 @@ export class WebCrawler {
return urlsHandler(urls);
} else {
let filteredLinks = this.filterLinks(
[...new Set(urls)],
[...new Set(urls)].filter(x => this.filterURL(x, this.initialUrl) !== null),
leftOfLimit,
this.maxCrawledDepth,
fromMap,