mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-08 16:28:59 +08:00
fix(queue-worker/kickoff): make crawls wait for kickoff to finish (matters on big sitemapped sites)
This commit is contained in:
parent
23bb172592
commit
d5929af010
@ -128,6 +128,7 @@ export async function isCrawlFinished(id: string) {
|
|||||||
return (
|
return (
|
||||||
(await redisConnection.scard("crawl:" + id + ":jobs_done")) ===
|
(await redisConnection.scard("crawl:" + id + ":jobs_done")) ===
|
||||||
(await redisConnection.scard("crawl:" + id + ":jobs"))
|
(await redisConnection.scard("crawl:" + id + ":jobs"))
|
||||||
|
&& (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,6 +136,10 @@ export async function isCrawlFinishedLocked(id: string) {
|
|||||||
return await redisConnection.exists("crawl:" + id + ":finish");
|
return await redisConnection.exists("crawl:" + id + ":finish");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function finishCrawlKickoff(id: string) {
|
||||||
|
await redisConnection.set("crawl:" + id + ":kickoff:finish", "yes", "EX", 24 * 60 * 60);
|
||||||
|
}
|
||||||
|
|
||||||
export async function finishCrawl(id: string) {
|
export async function finishCrawl(id: string) {
|
||||||
if (await isCrawlFinished(id)) {
|
if (await isCrawlFinished(id)) {
|
||||||
_logger.debug("Marking crawl as finished.", {
|
_logger.debug("Marking crawl as finished.", {
|
||||||
|
@ -23,6 +23,7 @@ import {
|
|||||||
addCrawlJobs,
|
addCrawlJobs,
|
||||||
crawlToCrawler,
|
crawlToCrawler,
|
||||||
finishCrawl,
|
finishCrawl,
|
||||||
|
finishCrawlKickoff,
|
||||||
generateURLPermutations,
|
generateURLPermutations,
|
||||||
getCrawl,
|
getCrawl,
|
||||||
getCrawlJobCount,
|
getCrawlJobCount,
|
||||||
@ -675,6 +676,9 @@ async function processKickoffJob(job: Job & { id: string }, token: string) {
|
|||||||
|
|
||||||
logger.debug("Done queueing jobs!");
|
logger.debug("Done queueing jobs!");
|
||||||
|
|
||||||
|
await finishCrawlKickoff(job.data.crawl_id);
|
||||||
|
await finishCrawlIfNeeded(job, sc);
|
||||||
|
|
||||||
return { success: true };
|
return { success: true };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error("An error occurred!", { error });
|
logger.error("An error occurred!", { error });
|
||||||
|
Loading…
x
Reference in New Issue
Block a user