mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
a585340740
@ -9,6 +9,7 @@ const constructQueueKey = (team_id: string) =>
|
|||||||
const stalledJobTimeoutMs = 2 * 60 * 1000;
|
const stalledJobTimeoutMs = 2 * 60 * 1000;
|
||||||
|
|
||||||
export function getConcurrencyLimitMax(plan: string): number {
|
export function getConcurrencyLimitMax(plan: string): number {
|
||||||
|
if (plan === "growth") return 100;
|
||||||
return getRateLimiterPoints(RateLimiterMode.Scrape, undefined, plan);
|
return getRateLimiterPoints(RateLimiterMode.Scrape, undefined, plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -401,13 +401,13 @@ export class WebCrawler {
|
|||||||
|
|
||||||
public async extractLinksFromHTML(html: string, url: string) {
|
public async extractLinksFromHTML(html: string, url: string) {
|
||||||
try {
|
try {
|
||||||
return (await this.extractLinksFromHTMLRust(html, url)).map(x => {
|
return [...new Set((await this.extractLinksFromHTMLRust(html, url)).map(x => {
|
||||||
try {
|
try {
|
||||||
return new URL(x, url).href
|
return new URL(x, url).href
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}).filter(x => x !== null) as string[];
|
}).filter(x => x !== null) as string[])];
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.error("Failed to call html-transformer! Falling back to cheerio...", {
|
this.logger.error("Failed to call html-transformer! Falling back to cheerio...", {
|
||||||
error,
|
error,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user