mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-19 16:39:08 +08:00
minor spacing and comment stuff
This commit is contained in:
parent
a5fb45988c
commit
4d6e25619b
@ -286,8 +286,6 @@ export class WebCrawler {
|
|||||||
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
|
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
|
||||||
if (this.isInternalLink(fullUrl) &&
|
if (this.isInternalLink(fullUrl) &&
|
||||||
this.noSections(fullUrl) &&
|
this.noSections(fullUrl) &&
|
||||||
// The idea here to comment this out is to allow wider website coverage as we filter this anyway afterwards
|
|
||||||
// this.matchesIncludes(path) &&
|
|
||||||
!this.matchesExcludes(path) &&
|
!this.matchesExcludes(path) &&
|
||||||
this.isRobotsAllowed(fullUrl)
|
this.isRobotsAllowed(fullUrl)
|
||||||
) {
|
) {
|
||||||
@ -295,7 +293,7 @@ export class WebCrawler {
|
|||||||
}
|
}
|
||||||
} else { // EXTERNAL LINKS
|
} else { // EXTERNAL LINKS
|
||||||
if (
|
if (
|
||||||
this.isInternalLink(url) && //its avoid to add links from external pages on the queue
|
this.isInternalLink(url) &&
|
||||||
this.allowExternalContentLinks &&
|
this.allowExternalContentLinks &&
|
||||||
!this.isSocialMediaOrEmail(fullUrl) &&
|
!this.isSocialMediaOrEmail(fullUrl) &&
|
||||||
!this.matchesExcludes(fullUrl, true) &&
|
!this.matchesExcludes(fullUrl, true) &&
|
||||||
|
Loading…
x
Reference in New Issue
Block a user