diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts index 084685c7..b753b17b 100644 --- a/apps/api/src/controllers/v1/crawl-status.ts +++ b/apps/api/src/controllers/v1/crawl-status.ts @@ -60,12 +60,24 @@ export async function crawlStatusController(req: RequestWithAuth [x, await getScrapeQueue().getJobState(x)] as const)); const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id)); - jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed - // filter out failed jobs - jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed")); - // filter the job statues - jobStatuses = jobStatuses.filter(x => x[1] !== "failed" && x[1] !== "unknown"); - const status: Exclude["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping"; + + const throttledJobsSet = new Set(throttledJobs); + + const validJobStatuses = []; + const validJobIDs = []; + + for (const [id, status] of jobStatuses) { + if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") { + validJobStatuses.push([id, status]); + validJobIDs.push(id); + } + } + + const status: Exclude["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping"; + + // Use validJobIDs instead of jobIDs for further processing + jobIDs = validJobIDs; + const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId); const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);