From d5929af01094c1b324abe693b3d56a2fdf1af6fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 17 Jan 2025 16:04:01 +0100 Subject: [PATCH] fix(queue-worker/kickoff): make crawls wait for kickoff to finish (matters on big sitemapped sites) --- apps/api/src/lib/crawl-redis.ts | 5 +++++ apps/api/src/services/queue-worker.ts | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index 5b25969e..5868c28a 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -128,6 +128,7 @@ export async function isCrawlFinished(id: string) { return ( (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs")) + && (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null ); } @@ -135,6 +136,10 @@ export async function isCrawlFinishedLocked(id: string) { return await redisConnection.exists("crawl:" + id + ":finish"); } +export async function finishCrawlKickoff(id: string) { + await redisConnection.set("crawl:" + id + ":kickoff:finish", "yes", "EX", 24 * 60 * 60); +} + export async function finishCrawl(id: string) { if (await isCrawlFinished(id)) { _logger.debug("Marking crawl as finished.", { diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index c5b164f1..47f873ec 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -23,6 +23,7 @@ import { addCrawlJobs, crawlToCrawler, finishCrawl, + finishCrawlKickoff, generateURLPermutations, getCrawl, getCrawlJobCount, @@ -675,6 +676,9 @@ async function processKickoffJob(job: Job & { id: string }, token: string) { logger.debug("Done queueing jobs!"); + await finishCrawlKickoff(job.data.crawl_id); + await finishCrawlIfNeeded(job, sc); + return { success: true }; } catch (error) { logger.error("An error occurred!", { error });