fix(crawl): allow execution time longer than 24h

This commit is contained in:
Gergő Móricz 2025-03-17 18:04:05 +01:00
parent 7e7b7e10fe
commit d12feaea52
2 changed files with 11 additions and 13 deletions

View File

@ -27,7 +27,7 @@ export async function saveCrawl(id: string, crawl: StoredCrawl) {
plan: crawl.plan,
});
await redisConnection.set("crawl:" + id, JSON.stringify(crawl));
await redisConnection.expire("crawl:" + id, 24 * 60 * 60, "NX");
await redisConnection.expire("crawl:" + id, 24 * 60 * 60);
}
export async function getCrawl(id: string): Promise<StoredCrawl | null> {
@ -37,6 +37,7 @@ export async function getCrawl(id: string): Promise<StoredCrawl | null> {
return null;
}
await redisConnection.expire("crawl:" + id, 24 * 60 * 60);
return JSON.parse(x);
}
@ -56,7 +57,7 @@ export async function addCrawlJob(id: string, job_id: string) {
crawlId: id,
});
await redisConnection.sadd("crawl:" + id + ":jobs", job_id);
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60);
}
export async function addCrawlJobs(id: string, job_ids: string[]) {
@ -69,7 +70,7 @@ export async function addCrawlJobs(id: string, job_ids: string[]) {
crawlId: id,
});
await redisConnection.sadd("crawl:" + id + ":jobs", ...job_ids);
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60);
}
export async function addCrawlJobDone(
@ -87,7 +88,6 @@ export async function addCrawlJobDone(
await redisConnection.expire(
"crawl:" + id + ":jobs_done",
24 * 60 * 60,
"NX",
);
if (success) {
@ -104,11 +104,11 @@ export async function addCrawlJobDone(
await redisConnection.expire(
"crawl:" + id + ":jobs_done_ordered",
24 * 60 * 60,
"NX",
);
}
export async function getDoneJobsOrderedLength(id: string): Promise<number> {
await redisConnection.expire("crawl:" + id + ":jobs_done_ordered", 24 * 60 * 60);
return await redisConnection.llen("crawl:" + id + ":jobs_done_ordered");
}
@ -117,6 +117,7 @@ export async function getDoneJobsOrdered(
start = 0,
end = -1,
): Promise<string[]> {
await redisConnection.expire("crawl:" + id + ":jobs_done_ordered", 24 * 60 * 60);
return await redisConnection.lrange(
"crawl:" + id + ":jobs_done_ordered",
start,
@ -125,6 +126,7 @@ export async function getDoneJobsOrdered(
}
export async function isCrawlFinished(id: string) {
await redisConnection.expire("crawl:" + id + ":kickoff:finish", 24 * 60 * 60);
return (
(await redisConnection.scard("crawl:" + id + ":jobs_done")) ===
(await redisConnection.scard("crawl:" + id + ":jobs")) &&
@ -133,6 +135,7 @@ export async function isCrawlFinished(id: string) {
}
export async function isCrawlKickoffFinished(id: string) {
await redisConnection.expire("crawl:" + id + ":kickoff:finish", 24 * 60 * 60);
return (
(await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null
);
@ -159,9 +162,7 @@ export async function finishCrawl(id: string) {
crawlId: id,
});
const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes");
if (set === 1) {
await redisConnection.expire("crawl:" + id + ":finish", 24 * 60 * 60);
}
await redisConnection.expire("crawl:" + id + ":finish", 24 * 60 * 60);
return set === 1;
} else {
_logger.debug("Crawl can not be finished yet, not marking as finished.", {
@ -294,14 +295,13 @@ export async function lockURL(
res = x === permutations.length;
}
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60);
if (res) {
await redisConnection.sadd("crawl:" + id + ":visited_unique", url);
await redisConnection.expire(
"crawl:" + id + ":visited_unique",
24 * 60 * 60,
"NX",
);
}
@ -334,7 +334,6 @@ export async function lockURLs(
await redisConnection.expire(
"crawl:" + id + ":visited_unique",
24 * 60 * 60,
"NX",
);
let res: boolean;
@ -353,7 +352,7 @@ export async function lockURLs(
res = x === allPermutations.length;
}
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60);
logger.debug("lockURLs final result: " + res, { res });
return res;

View File

@ -384,7 +384,6 @@ export class WebCrawler {
await redisConnection.expire(
"crawl:" + this.jobId + ":robots_blocked",
24 * 60 * 60,
"NX",
);
})();
}