mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 20:19:11 +08:00
fix(crawler): sitemaps poisoning crawls with unrelated links (#1334)
This commit is contained in:
parent
da6b7505f3
commit
c7ae50d2d0
@ -271,7 +271,7 @@ export class WebCrawler {
|
|||||||
return urlsHandler(urls);
|
return urlsHandler(urls);
|
||||||
} else {
|
} else {
|
||||||
let filteredLinks = this.filterLinks(
|
let filteredLinks = this.filterLinks(
|
||||||
[...new Set(urls)],
|
[...new Set(urls)].filter(x => this.filterURL(x, this.initialUrl) !== null),
|
||||||
leftOfLimit,
|
leftOfLimit,
|
||||||
this.maxCrawledDepth,
|
this.maxCrawledDepth,
|
||||||
fromMap,
|
fromMap,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user