mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-08 14:19:01 +08:00
fix(queue-worker/kickoff): make crawls wait for kickoff to finish (matters on big sitemapped sites)
This commit is contained in:
parent
23bb172592
commit
d5929af010
@ -128,6 +128,7 @@ export async function isCrawlFinished(id: string) {
|
||||
return (
|
||||
(await redisConnection.scard("crawl:" + id + ":jobs_done")) ===
|
||||
(await redisConnection.scard("crawl:" + id + ":jobs"))
|
||||
&& (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null
|
||||
);
|
||||
}
|
||||
|
||||
@ -135,6 +136,10 @@ export async function isCrawlFinishedLocked(id: string) {
|
||||
return await redisConnection.exists("crawl:" + id + ":finish");
|
||||
}
|
||||
|
||||
export async function finishCrawlKickoff(id: string) {
|
||||
await redisConnection.set("crawl:" + id + ":kickoff:finish", "yes", "EX", 24 * 60 * 60);
|
||||
}
|
||||
|
||||
export async function finishCrawl(id: string) {
|
||||
if (await isCrawlFinished(id)) {
|
||||
_logger.debug("Marking crawl as finished.", {
|
||||
|
@ -23,6 +23,7 @@ import {
|
||||
addCrawlJobs,
|
||||
crawlToCrawler,
|
||||
finishCrawl,
|
||||
finishCrawlKickoff,
|
||||
generateURLPermutations,
|
||||
getCrawl,
|
||||
getCrawlJobCount,
|
||||
@ -675,6 +676,9 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
|
||||
|
||||
logger.debug("Done queueing jobs!");
|
||||
|
||||
await finishCrawlKickoff(job.data.crawl_id);
|
||||
await finishCrawlIfNeeded(job, sc);
|
||||
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
logger.error("An error occurred!", { error });
|
||||
|
Loading…
x
Reference in New Issue
Block a user