mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 05:25:57 +08:00
fix(WebScraper/sitemap): await urlsHandler to fix race condition
This commit is contained in:
parent
8ae34a0d31
commit
71a8f7452c
@ -506,7 +506,7 @@ export class WebCrawler {
|
|||||||
// Get all links from the main domain's sitemap
|
// Get all links from the main domain's sitemap
|
||||||
sitemapCount += await getLinksFromSitemap(
|
sitemapCount += await getLinksFromSitemap(
|
||||||
{ sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) {
|
{ sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) {
|
||||||
urlsHandler(urls.filter(link => {
|
return urlsHandler(urls.filter(link => {
|
||||||
try {
|
try {
|
||||||
const linkUrl = new URL(link);
|
const linkUrl = new URL(link);
|
||||||
return linkUrl.hostname.endsWith(hostname);
|
return linkUrl.hostname.endsWith(hostname);
|
||||||
|
@ -103,7 +103,11 @@ export async function getLinksFromSitemap(
|
|||||||
)
|
)
|
||||||
.map((url) => url.loc[0]);
|
.map((url) => url.loc[0]);
|
||||||
count += validUrls.length;
|
count += validUrls.length;
|
||||||
urlsHandler(validUrls);
|
|
||||||
|
const h = urlsHandler(validUrls);
|
||||||
|
if (h instanceof Promise) {
|
||||||
|
await h;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user