added regex for links in sitemap

This commit is contained in:
rafaelsideguide 2024-07-23 09:07:23 -03:00
parent 252bc09ee2
commit a684bd3c5d

View File

@ -64,6 +64,14 @@ export class WebCrawler {
private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
return sitemapLinks
.filter((link) => {
// if link is not a complete url, add the base url
link = link.trim();
const isCompleteUrl = new RegExp('^(?:[a-z+]+:)?//', 'i');
if (!isCompleteUrl.test(link)){
link = this.baseUrl + link;
}
const url = new URL(link);
const path = url.pathname;