From 71a8f7452cb5ba3777d80f953514c644d273cd60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Mon, 30 Dec 2024 16:09:14 +0100 Subject: [PATCH] fix(WebScraper/sitemap): await urlsHandler to fix race condition --- apps/api/src/scraper/WebScraper/crawler.ts | 2 +- apps/api/src/scraper/WebScraper/sitemap.ts | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 5695d284..9c49cf9b 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -506,7 +506,7 @@ export class WebCrawler { // Get all links from the main domain's sitemap sitemapCount += await getLinksFromSitemap( { sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) { - urlsHandler(urls.filter(link => { + return urlsHandler(urls.filter(link => { try { const linkUrl = new URL(link); return linkUrl.hostname.endsWith(hostname); diff --git a/apps/api/src/scraper/WebScraper/sitemap.ts b/apps/api/src/scraper/WebScraper/sitemap.ts index 8028d225..128b3f03 100644 --- a/apps/api/src/scraper/WebScraper/sitemap.ts +++ b/apps/api/src/scraper/WebScraper/sitemap.ts @@ -103,7 +103,11 @@ export async function getLinksFromSitemap( ) .map((url) => url.loc[0]); count += validUrls.length; - urlsHandler(validUrls); + + const h = urlsHandler(validUrls); + if (h instanceof Promise) { + await h; + } } return count;