bug fixed. crawl should not stop if sitemap url is invalid

This commit is contained in:
rafaelsideguide 2024-08-20 09:11:58 -03:00
parent 0dce57832d
commit e1c9cbf709
2 changed files with 7 additions and 2 deletions

View File

@ -69,7 +69,13 @@ export class WebCrawler {
public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
return sitemapLinks
.filter((link) => {
const url = new URL(link.trim(), this.baseUrl);
let url: URL;
try {
url = new URL(link.trim(), this.baseUrl);
} catch (error) {
Logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
return false;
}
const path = url.pathname;
const depth = getURLDepth(url.toString());

View File

@ -73,7 +73,6 @@ export async function scrapWithFireEngine({
);
if (pageOptions?.useFastMode) {
console.log('using tlsclient')
fireEngineOptionsParam.engine = "tlsclient";
engine = "tlsclient";
}