mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 15:49:02 +08:00
fix(crawler): check for more strings
This commit is contained in:
parent
4902d0ac64
commit
c22c87ab0a
@ -179,7 +179,7 @@ export class WebCrawler {
|
|||||||
|
|
||||||
const isAllowed = this.ignoreRobotsTxt
|
const isAllowed = this.ignoreRobotsTxt
|
||||||
? true
|
? true
|
||||||
: (this.robots.isAllowed(link, "FireCrawlAgent") ?? true);
|
: ((this.robots.isAllowed(link, "FireCrawlAgent") || this.robots.isAllowed(link, "FirecrawlAgent")) ?? true);
|
||||||
// Check if the link is disallowed by robots.txt
|
// Check if the link is disallowed by robots.txt
|
||||||
if (!isAllowed) {
|
if (!isAllowed) {
|
||||||
this.logger.debug(`Link disallowed by robots.txt: ${link}`, {
|
this.logger.debug(`Link disallowed by robots.txt: ${link}`, {
|
||||||
@ -453,7 +453,7 @@ export class WebCrawler {
|
|||||||
return ignoreRobotsTxt
|
return ignoreRobotsTxt
|
||||||
? true
|
? true
|
||||||
: this.robots
|
: this.robots
|
||||||
? (this.robots.isAllowed(url, "FireCrawlAgent") ?? true)
|
? ((this.robots.isAllowed(url, "FireCrawlAgent") || this.robots.isAllowed(url, "FirecrawlAgent")) ?? true)
|
||||||
: true;
|
: true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user