mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 18:10:46 +08:00
Merge pull request #779 from mendableai/fix/check-files
[BUG] added check files on crawl
This commit is contained in:
commit
ca84491ccb
@ -136,6 +136,10 @@ export class WebCrawler {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this.isFile(link)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
})
|
})
|
||||||
.slice(0, limit);
|
.slice(0, limit);
|
||||||
@ -478,7 +482,14 @@ export class WebCrawler {
|
|||||||
".webp",
|
".webp",
|
||||||
".inc"
|
".inc"
|
||||||
];
|
];
|
||||||
return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
|
|
||||||
|
try {
|
||||||
|
const urlWithoutQuery = url.split('?')[0].toLowerCase();
|
||||||
|
return fileExtensions.some((ext) => urlWithoutQuery.endsWith(ext));
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error(`Error processing URL in isFile: ${error}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private isSocialMediaOrEmail(url: string): boolean {
|
private isSocialMediaOrEmail(url: string): boolean {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user