fix(queue-worker/crawl): only report successful page count in num_docs (#1179)

This commit is contained in:
Gergő Móricz 2025-02-13 17:14:24 +01:00 committed by GitHub
parent 584221a106
commit 73e7884df4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -29,6 +29,7 @@ import {
getCrawl, getCrawl,
getCrawlJobCount, getCrawlJobCount,
getCrawlJobs, getCrawlJobs,
getDoneJobsOrderedLength,
lockURL, lockURL,
lockURLs, lockURLs,
lockURLsIndividually, lockURLsIndividually,
@ -185,7 +186,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
); );
} }
} else { } else {
const num_docs = await getCrawlJobCount(job.data.crawl_id); const num_docs = await getDoneJobsOrderedLength(job.data.crawl_id);
const jobStatus = sc.cancelled ? "failed" : "completed"; const jobStatus = sc.cancelled ? "failed" : "completed";
await logJob( await logJob(