mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 01:59:04 +08:00
fix: filter out invalid URLs from crawl links
This commit is contained in:
parent
dae1408e66
commit
55009e51f5
@ -267,9 +267,18 @@ export class WebCrawler {
|
||||
public filterURL(href: string, url: string): string | null {
|
||||
let fullUrl = href;
|
||||
if (!href.startsWith("http")) {
|
||||
fullUrl = new URL(href, this.baseUrl).toString();
|
||||
try {
|
||||
fullUrl = new URL(href, this.baseUrl).toString();
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
let urlObj;
|
||||
try {
|
||||
urlObj = new URL(fullUrl);
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
const urlObj = new URL(fullUrl);
|
||||
const path = urlObj.pathname;
|
||||
|
||||
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
|
||||
|
Loading…
x
Reference in New Issue
Block a user