fix(WebScraper/sitemap): await urlsHandler to fix race condition

This commit is contained in:
Gergő Móricz 2024-12-30 16:09:14 +01:00
parent 8ae34a0d31
commit 71a8f7452c
2 changed files with 6 additions and 2 deletions

View File

@ -506,7 +506,7 @@ export class WebCrawler {
// Get all links from the main domain's sitemap // Get all links from the main domain's sitemap
sitemapCount += await getLinksFromSitemap( sitemapCount += await getLinksFromSitemap(
{ sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) { { sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) {
urlsHandler(urls.filter(link => { return urlsHandler(urls.filter(link => {
try { try {
const linkUrl = new URL(link); const linkUrl = new URL(link);
return linkUrl.hostname.endsWith(hostname); return linkUrl.hostname.endsWith(hostname);

View File

@ -103,7 +103,11 @@ export async function getLinksFromSitemap(
) )
.map((url) => url.loc[0]); .map((url) => url.loc[0]);
count += validUrls.length; count += validUrls.length;
urlsHandler(validUrls);
const h = urlsHandler(validUrls);
if (h instanceof Promise) {
await h;
}
} }
return count; return count;