mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 23:15:55 +08:00
bug fixed. crawl should not stop if sitemap url is invalid
This commit is contained in:
parent
0dce57832d
commit
e1c9cbf709
@ -69,7 +69,13 @@ export class WebCrawler {
|
|||||||
public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
|
public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
|
||||||
return sitemapLinks
|
return sitemapLinks
|
||||||
.filter((link) => {
|
.filter((link) => {
|
||||||
const url = new URL(link.trim(), this.baseUrl);
|
let url: URL;
|
||||||
|
try {
|
||||||
|
url = new URL(link.trim(), this.baseUrl);
|
||||||
|
} catch (error) {
|
||||||
|
Logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const path = url.pathname;
|
const path = url.pathname;
|
||||||
|
|
||||||
const depth = getURLDepth(url.toString());
|
const depth = getURLDepth(url.toString());
|
||||||
|
@ -73,7 +73,6 @@ export async function scrapWithFireEngine({
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (pageOptions?.useFastMode) {
|
if (pageOptions?.useFastMode) {
|
||||||
console.log('using tlsclient')
|
|
||||||
fireEngineOptionsParam.engine = "tlsclient";
|
fireEngineOptionsParam.engine = "tlsclient";
|
||||||
engine = "tlsclient";
|
engine = "tlsclient";
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user