From 0591000b64ad586a4d24c74298a5167bc296726e Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 9 Aug 2024 14:30:41 -0300 Subject: [PATCH] bugfix includes excludes --- apps/api/src/scraper/WebScraper/index.ts | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index e667fa6b..859127bd 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -168,11 +168,29 @@ export class WebScraperDataProvider { private async handleCrawlMode( inProgress?: (progress: Progress) => void ): Promise { + let includes: string[]; + if (Array.isArray(this.includes)) { + if (this.includes[0] != "") { + includes = this.includes; + } + } else { + includes = this.includes.split(','); + } + + let excludes: string[]; + if (Array.isArray(this.excludes)) { + if (this.excludes[0] != "") { + excludes = this.excludes; + } + } else { + excludes = this.excludes.split(','); + } + const crawler = new WebCrawler({ jobId: this.jobId, initialUrl: this.urls[0], - includes: Array.isArray(this.includes) ? this.includes : this.includes.split(','), - excludes: Array.isArray(this.excludes) ? this.excludes : this.excludes.split(','), + includes, + excludes, maxCrawledLinks: this.maxCrawledLinks, maxCrawledDepth: getAdjustedMaxDepth(this.urls[0], this.maxCrawledDepth), limit: this.limit,