diff --git a/apps/api/src/lib/concurrency-limit.ts b/apps/api/src/lib/concurrency-limit.ts index 8205113f..a2d27077 100644 --- a/apps/api/src/lib/concurrency-limit.ts +++ b/apps/api/src/lib/concurrency-limit.ts @@ -9,6 +9,7 @@ const constructQueueKey = (team_id: string) => const stalledJobTimeoutMs = 2 * 60 * 1000; export function getConcurrencyLimitMax(plan: string): number { + if (plan === "growth") return 100; return getRateLimiterPoints(RateLimiterMode.Scrape, undefined, plan); } diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index ea606f44..14ae5d71 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -401,13 +401,13 @@ export class WebCrawler { public async extractLinksFromHTML(html: string, url: string) { try { - return (await this.extractLinksFromHTMLRust(html, url)).map(x => { + return [...new Set((await this.extractLinksFromHTMLRust(html, url)).map(x => { try { return new URL(x, url).href } catch (e) { return null; } - }).filter(x => x !== null) as string[]; + }).filter(x => x !== null) as string[])]; } catch (error) { this.logger.error("Failed to call html-transformer! Falling back to cheerio...", { error,