fix(queue-worker/kickoff): make crawls wait for kickoff to finish (matters on big sitemapped sites)

This commit is contained in:
Gergő Móricz 2025-01-17 16:04:01 +01:00
parent 23bb172592
commit d5929af010
2 changed files with 9 additions and 0 deletions

View File

@ -128,6 +128,7 @@ export async function isCrawlFinished(id: string) {
return ( return (
(await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs_done")) ===
(await redisConnection.scard("crawl:" + id + ":jobs")) (await redisConnection.scard("crawl:" + id + ":jobs"))
&& (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null
); );
} }
@ -135,6 +136,10 @@ export async function isCrawlFinishedLocked(id: string) {
return await redisConnection.exists("crawl:" + id + ":finish"); return await redisConnection.exists("crawl:" + id + ":finish");
} }
export async function finishCrawlKickoff(id: string) {
await redisConnection.set("crawl:" + id + ":kickoff:finish", "yes", "EX", 24 * 60 * 60);
}
export async function finishCrawl(id: string) { export async function finishCrawl(id: string) {
if (await isCrawlFinished(id)) { if (await isCrawlFinished(id)) {
_logger.debug("Marking crawl as finished.", { _logger.debug("Marking crawl as finished.", {

View File

@ -23,6 +23,7 @@ import {
addCrawlJobs, addCrawlJobs,
crawlToCrawler, crawlToCrawler,
finishCrawl, finishCrawl,
finishCrawlKickoff,
generateURLPermutations, generateURLPermutations,
getCrawl, getCrawl,
getCrawlJobCount, getCrawlJobCount,
@ -675,6 +676,9 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
logger.debug("Done queueing jobs!"); logger.debug("Done queueing jobs!");
await finishCrawlKickoff(job.data.crawl_id);
await finishCrawlIfNeeded(job, sc);
return { success: true }; return { success: true };
} catch (error) { } catch (error) {
logger.error("An error occurred!", { error }); logger.error("An error occurred!", { error });