mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 16:20:48 +08:00
Merge pull request #449 from mendableai/bugfix/malformed-url-sitemap
Added regex for links in sitemap
This commit is contained in:
commit
2c1221750b
@ -64,7 +64,7 @@ export class WebCrawler {
|
||||
private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
|
||||
return sitemapLinks
|
||||
.filter((link) => {
|
||||
const url = new URL(link);
|
||||
const url = new URL(link.trim(), this.baseUrl);
|
||||
const path = url.pathname;
|
||||
|
||||
const depth = getURLDepth(url.toString());
|
||||
|
Loading…
x
Reference in New Issue
Block a user