mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 03:09:11 +08:00
parent
6bed5eca50
commit
570809aa59
@ -118,7 +118,19 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
|||||||
|
|
||||||
const lastUrlsSet = new Set(lastUrls);
|
const lastUrlsSet = new Set(lastUrls);
|
||||||
|
|
||||||
const univistedUrls = Array.from(lastUrlsSet).filter(x => !visitedUrls.has(x));
|
const crawler = crawlToCrawler(
|
||||||
|
job.data.crawl_id,
|
||||||
|
sc,
|
||||||
|
sc.originUrl!,
|
||||||
|
job.data.crawlerOptions,
|
||||||
|
);
|
||||||
|
|
||||||
|
const univistedUrls = crawler.filterLinks(
|
||||||
|
Array.from(lastUrlsSet).filter(x => !visitedUrls.has(x)),
|
||||||
|
Infinity,
|
||||||
|
sc.crawlerOptions.maxDepth ?? 10,
|
||||||
|
);
|
||||||
|
|
||||||
const addableJobCount = sc.crawlerOptions.limit === undefined ? Infinity : (sc.crawlerOptions.limit - await getDoneJobsOrderedLength(job.data.crawl_id));
|
const addableJobCount = sc.crawlerOptions.limit === undefined ? Infinity : (sc.crawlerOptions.limit - await getDoneJobsOrderedLength(job.data.crawl_id));
|
||||||
|
|
||||||
console.log(sc.originUrl!, univistedUrls, visitedUrls, lastUrls, addableJobCount);
|
console.log(sc.originUrl!, univistedUrls, visitedUrls, lastUrls, addableJobCount);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user