diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 59b53642..00d51853 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -64,6 +64,14 @@ export class WebCrawler { private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] { return sitemapLinks .filter((link) => { + + // if link is not a complete url, add the base url + link = link.trim(); + const isCompleteUrl = new RegExp('^(?:[a-z+]+:)?//', 'i'); + if (!isCompleteUrl.test(link)){ + link = this.baseUrl + link; + } + const url = new URL(link); const path = url.pathname;