fix(queue-worker/crawl): only report successful page count in num_docs (#1179)

This commit is contained in:
Gergő Móricz 2025-02-13 17:14:24 +01:00 committed by GitHub
parent 584221a106
commit 73e7884df4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -29,6 +29,7 @@ import {
getCrawl,
getCrawlJobCount,
getCrawlJobs,
getDoneJobsOrderedLength,
lockURL,
lockURLs,
lockURLsIndividually,
@ -185,7 +186,7 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
);
}
} else {
const num_docs = await getCrawlJobCount(job.data.crawl_id);
const num_docs = await getDoneJobsOrderedLength(job.data.crawl_id);
const jobStatus = sc.cancelled ? "failed" : "completed";
await logJob(