fix(queue-worker/kickoff): make crawls wait for kickoff to finish (matters on big sitemapped sites)

This commit is contained in:
Gergő Móricz 2025-01-17 16:04:01 +01:00
parent 23bb172592
commit d5929af010
2 changed files with 9 additions and 0 deletions

View File

@ -128,6 +128,7 @@ export async function isCrawlFinished(id: string) {
return (
(await redisConnection.scard("crawl:" + id + ":jobs_done")) ===
(await redisConnection.scard("crawl:" + id + ":jobs"))
&& (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null
);
}
@ -135,6 +136,10 @@ export async function isCrawlFinishedLocked(id: string) {
return await redisConnection.exists("crawl:" + id + ":finish");
}
export async function finishCrawlKickoff(id: string) {
await redisConnection.set("crawl:" + id + ":kickoff:finish", "yes", "EX", 24 * 60 * 60);
}
export async function finishCrawl(id: string) {
if (await isCrawlFinished(id)) {
_logger.debug("Marking crawl as finished.", {

View File

@ -23,6 +23,7 @@ import {
addCrawlJobs,
crawlToCrawler,
finishCrawl,
finishCrawlKickoff,
generateURLPermutations,
getCrawl,
getCrawlJobCount,
@ -675,6 +676,9 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
logger.debug("Done queueing jobs!");
await finishCrawlKickoff(job.data.crawl_id);
await finishCrawlIfNeeded(job, sc);
return { success: true };
} catch (error) {
logger.error("An error occurred!", { error });