mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 23:15:55 +08:00
Nick: fetch sitemap timeout param
This commit is contained in:
parent
24ddcd4a6d
commit
2e5785d8d9
@ -100,6 +100,7 @@ export async function getMapResults({
|
|||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
true,
|
true,
|
||||||
|
30000
|
||||||
);
|
);
|
||||||
if (sitemap > 0) {
|
if (sitemap > 0) {
|
||||||
links = links
|
links = links
|
||||||
|
@ -202,6 +202,7 @@ export class WebCrawler {
|
|||||||
urlsHandler: (urls: string[]) => unknown,
|
urlsHandler: (urls: string[]) => unknown,
|
||||||
fromMap: boolean = false,
|
fromMap: boolean = false,
|
||||||
onlySitemap: boolean = false,
|
onlySitemap: boolean = false,
|
||||||
|
timeout: number = 120000,
|
||||||
): Promise<number> {
|
): Promise<number> {
|
||||||
this.logger.debug(`Fetching sitemap links from ${this.initialUrl}`, {
|
this.logger.debug(`Fetching sitemap links from ${this.initialUrl}`, {
|
||||||
method: "tryGetSitemap",
|
method: "tryGetSitemap",
|
||||||
@ -250,13 +251,21 @@ export class WebCrawler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let count = (await Promise.all([
|
const timeoutPromise = new Promise((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error('Sitemap fetch timeout')), timeout);
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
let count = await Promise.race([
|
||||||
|
Promise.all([
|
||||||
this.tryFetchSitemapLinks(
|
this.tryFetchSitemapLinks(
|
||||||
this.initialUrl,
|
this.initialUrl,
|
||||||
_urlsHandler,
|
_urlsHandler,
|
||||||
),
|
),
|
||||||
...this.robots.getSitemaps().map(x => this.tryFetchSitemapLinks(x, _urlsHandler)),
|
...this.robots.getSitemaps().map(x => this.tryFetchSitemapLinks(x, _urlsHandler)),
|
||||||
])).reduce((a,x) => a+x, 0);
|
]).then(results => results.reduce((a,x) => a+x, 0)),
|
||||||
|
timeoutPromise
|
||||||
|
]) as number;
|
||||||
|
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
if (
|
if (
|
||||||
@ -271,6 +280,20 @@ export class WebCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
|
} catch (error) {
|
||||||
|
if (error.message === 'Sitemap fetch timeout') {
|
||||||
|
this.logger.warn('Sitemap fetch timed out', {
|
||||||
|
method: "tryGetSitemap",
|
||||||
|
timeout,
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
this.logger.error('Error fetching sitemap', {
|
||||||
|
method: "tryGetSitemap",
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public filterURL(href: string, url: string): string | null {
|
public filterURL(href: string, url: string): string | null {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user