From c7ae50d2d0be9aef9478b74c766a32d3edfb568b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 13 Mar 2025 19:31:05 +0100 Subject: [PATCH] fix(crawler): sitemaps poisoning crawls with unrelated links (#1334) --- apps/api/src/scraper/WebScraper/crawler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index ba4793d8..29a35e3b 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -271,7 +271,7 @@ export class WebCrawler { return urlsHandler(urls); } else { let filteredLinks = this.filterLinks( - [...new Set(urls)], + [...new Set(urls)].filter(x => this.filterURL(x, this.initialUrl) !== null), leftOfLimit, this.maxCrawledDepth, fromMap,