mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 04:36:02 +08:00
fix(crawler): verify URL
This commit is contained in:
parent
e690a6fda7
commit
8e3c2b2855
@ -108,7 +108,12 @@ export class WebCrawler {
|
|||||||
|
|
||||||
// Normalize the initial URL and the link to account for www and non-www versions
|
// Normalize the initial URL and the link to account for www and non-www versions
|
||||||
const normalizedInitialUrl = new URL(this.initialUrl);
|
const normalizedInitialUrl = new URL(this.initialUrl);
|
||||||
const normalizedLink = new URL(link);
|
let normalizedLink;
|
||||||
|
try {
|
||||||
|
normalizedLink = new URL(link);
|
||||||
|
} catch (_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, '');
|
const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, '');
|
||||||
const linkHostname = normalizedLink.hostname.replace(/^www\./, '');
|
const linkHostname = normalizedLink.hostname.replace(/^www\./, '');
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user